2
* Copyright © 2012 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
25
* \file lower_ubo_reference.cpp
27
* IR lower pass to replace dereferences of variables in a uniform
28
* buffer object with usage of ir_binop_ubo_load expressions, each of
29
* which can read data up to the size of a vec4.
31
* This relieves drivers of the responsibility to deal with tricky UBO
32
* layout issues like std140 structures and row_major matrices on
36
#include "lower_buffer_access.h"
37
#include "ir_builder.h"
38
#include "main/macros.h"
39
#include "glsl_parser_extras.h"
40
#include "main/shader_types.h"
42
using namespace ir_builder;
45
class lower_ubo_reference_visitor :
46
public lower_buffer_access::lower_buffer_access {
48
lower_ubo_reference_visitor(struct gl_linked_shader *shader,
49
bool clamp_block_indices,
50
bool use_std430_as_default)
51
: buffer_access_type(ubo_load_access),
52
shader(shader), clamp_block_indices(clamp_block_indices),
53
struct_field(NULL), variable(NULL), uniform_block(NULL),
56
this->use_std430_as_default = use_std430_as_default;
59
void handle_rvalue(ir_rvalue **rvalue);
60
ir_visitor_status visit_enter(ir_assignment *ir);
62
void setup_for_load_or_store(void *mem_ctx,
66
unsigned *const_offset,
68
const glsl_type **matrix_type,
69
enum glsl_interface_packing packing);
70
uint32_t ssbo_access_params();
71
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
73
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
76
bool check_for_buffer_array_copy(ir_assignment *ir);
77
bool check_for_buffer_struct_copy(ir_assignment *ir);
78
void check_for_ssbo_store(ir_assignment *ir);
79
void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
80
ir_variable *write_var, unsigned write_mask);
81
ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
88
ssbo_unsized_array_length_access,
92
void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
93
const glsl_type *type, ir_rvalue *offset,
94
unsigned mask, int channel);
96
ir_visitor_status visit_enter(class ir_expression *);
97
ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
98
void check_ssbo_unsized_array_length_expression(class ir_expression *);
99
void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
101
ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
104
ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
106
unsigned calculate_unsized_array_stride(ir_dereference *deref,
107
enum glsl_interface_packing packing);
109
ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
110
ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
111
ir_visitor_status visit_enter(ir_call *ir);
112
ir_visitor_status visit_enter(ir_texture *ir);
114
struct gl_linked_shader *shader;
115
bool clamp_block_indices;
116
const struct glsl_struct_field *struct_field;
117
ir_variable *variable;
118
ir_rvalue *uniform_block;
123
* Determine the name of the interface block field
125
* This is the name of the specific member as it would appear in the
126
* \c gl_uniform_buffer_variable::Name field in the shader's
127
* \c UniformBlocks array.
130
interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
131
ir_rvalue **nonconst_block_index)
133
*nonconst_block_index = NULL;
134
char *name_copy = NULL;
135
size_t base_length = 0;
137
/* Loop back through the IR until we find the uniform block */
140
switch (ir->ir_type) {
141
case ir_type_dereference_variable: {
147
case ir_type_dereference_record: {
148
ir_dereference_record *r = (ir_dereference_record *) ir;
149
ir = r->record->as_dereference();
151
/* If we got here it means any previous array subscripts belong to
152
* block members and not the block itself so skip over them in the
159
case ir_type_dereference_array: {
160
ir_dereference_array *a = (ir_dereference_array *) ir;
161
ir = a->array->as_dereference();
165
case ir_type_swizzle: {
166
ir_swizzle *s = (ir_swizzle *) ir;
167
ir = s->val->as_dereference();
168
/* Skip swizzle in the next pass */
174
assert(!"Should not get here.");
180
switch (d->ir_type) {
181
case ir_type_dereference_variable: {
182
ir_dereference_variable *v = (ir_dereference_variable *) d;
183
if (name_copy != NULL &&
184
v->var->is_interface_instance() &&
185
v->var->type->is_array()) {
188
*nonconst_block_index = NULL;
195
case ir_type_dereference_array: {
196
ir_dereference_array *a = (ir_dereference_array *) d;
199
if (name_copy == NULL) {
200
name_copy = ralloc_strdup(mem_ctx, base_name);
201
base_length = strlen(name_copy);
204
/* For arrays of arrays we start at the innermost array and work our
205
* way out so we need to insert the subscript at the base of the
206
* name string rather than just attaching it to the end.
208
new_length = base_length;
209
ir_constant *const_index = a->array_index->as_constant();
210
char *end = ralloc_strdup(NULL, &name_copy[new_length]);
212
ir_rvalue *array_index = a->array_index;
213
if (array_index->type != glsl_type::uint_type)
214
array_index = i2u(array_index);
216
if (a->array->type->is_array() &&
217
a->array->type->fields.array->is_array()) {
218
ir_constant *base_size = new(mem_ctx)
219
ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
220
array_index = mul(array_index, base_size);
223
if (*nonconst_block_index) {
224
*nonconst_block_index = add(*nonconst_block_index, array_index);
226
*nonconst_block_index = array_index;
229
ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
232
ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
233
const_index->get_uint_component(0),
238
d = a->array->as_dereference();
244
assert(!"Should not get here.");
249
assert(!"Should not get here.");
254
clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
256
assert(type->is_array());
258
const unsigned array_size = type->arrays_of_arrays_size();
260
ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
261
max_index->type = index->type;
263
ir_constant *zero = new(mem_ctx) ir_constant(0);
264
zero->type = index->type;
266
if (index->type->base_type == GLSL_TYPE_INT)
267
index = max2(index, zero);
268
index = min2(index, max_index);
274
lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
278
unsigned *const_offset,
280
const glsl_type **matrix_type,
281
enum glsl_interface_packing packing)
283
/* Determine the name of the interface block */
284
ir_rvalue *nonconst_block_index;
285
const char *const field_name =
286
interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
287
deref, &nonconst_block_index);
289
if (nonconst_block_index && clamp_block_indices) {
290
nonconst_block_index =
291
clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
294
/* Locate the block by interface name */
296
struct gl_uniform_block **blocks;
297
if (this->buffer_access_type != ubo_load_access) {
298
num_blocks = shader->Program->info.num_ssbos;
299
blocks = shader->Program->sh.ShaderStorageBlocks;
301
num_blocks = shader->Program->info.num_ubos;
302
blocks = shader->Program->sh.UniformBlocks;
304
this->uniform_block = NULL;
305
for (unsigned i = 0; i < num_blocks; i++) {
306
if (strcmp(field_name, blocks[i]->name.string) == 0) {
308
ir_constant *index = new(mem_ctx) ir_constant(i);
310
if (nonconst_block_index) {
311
this->uniform_block = add(nonconst_block_index, index);
313
this->uniform_block = index;
316
if (var->is_interface_instance()) {
319
*const_offset = blocks[i]->Uniforms[var->data.location].Offset;
326
assert(this->uniform_block);
328
this->struct_field = NULL;
329
setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
330
matrix_type, &this->struct_field, packing);
334
lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
339
ir_dereference *deref = (*rvalue)->as_dereference();
343
ir_variable *var = deref->variable_referenced();
344
if (!var || !var->is_in_buffer_block())
347
void *mem_ctx = ralloc_parent(shader->ir);
349
ir_rvalue *offset = NULL;
350
unsigned const_offset;
352
const glsl_type *matrix_type;
354
enum glsl_interface_packing packing =
355
var->get_interface_type()->
356
get_internal_ifc_packing(use_std430_as_default);
358
this->buffer_access_type =
359
var->is_in_shader_storage_block() ?
360
ssbo_load_access : ubo_load_access;
361
this->variable = var;
363
/* Compute the offset to the start if the dereference as well as other
364
* information we need to configure the write
366
setup_for_load_or_store(mem_ctx, var, deref,
367
&offset, &const_offset,
368
&row_major, &matrix_type,
372
/* Now that we've calculated the offset to the start of the
373
* dereference, walk over the type and emit loads into a temporary.
375
const glsl_type *type = (*rvalue)->type;
376
ir_variable *load_var = new(mem_ctx) ir_variable(type,
379
base_ir->insert_before(load_var);
381
ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
382
"ubo_load_temp_offset",
384
base_ir->insert_before(load_offset);
385
base_ir->insert_before(assign(load_offset, offset));
387
deref = new(mem_ctx) ir_dereference_variable(load_var);
388
emit_access(mem_ctx, false, deref, load_offset, const_offset,
389
row_major, matrix_type, packing, 0);
396
lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
397
const glsl_type *type,
400
ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
402
ir_expression(ir_binop_ubo_load,
410
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
412
return state->has_shader_storage_buffer_objects();
416
lower_ubo_reference_visitor::ssbo_access_params()
420
if (variable->is_interface_instance()) {
421
assert(struct_field);
423
return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) |
424
(struct_field->memory_restrict ? ACCESS_RESTRICT : 0) |
425
(struct_field->memory_volatile ? ACCESS_VOLATILE : 0));
427
return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) |
428
(variable->data.memory_restrict ? ACCESS_RESTRICT : 0) |
429
(variable->data.memory_volatile ? ACCESS_VOLATILE : 0));
434
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
439
exec_list sig_params;
441
ir_variable *block_ref = new(mem_ctx)
442
ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
443
sig_params.push_tail(block_ref);
445
ir_variable *offset_ref = new(mem_ctx)
446
ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
447
sig_params.push_tail(offset_ref);
449
ir_variable *val_ref = new(mem_ctx)
450
ir_variable(deref->type, "value" , ir_var_function_in);
451
sig_params.push_tail(val_ref);
453
ir_variable *writemask_ref = new(mem_ctx)
454
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
455
sig_params.push_tail(writemask_ref);
457
ir_variable *access_ref = new(mem_ctx)
458
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
459
sig_params.push_tail(access_ref);
461
ir_function_signature *sig = new(mem_ctx)
462
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
464
sig->replace_parameters(&sig_params);
465
sig->intrinsic_id = ir_intrinsic_ssbo_store;
467
ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
468
f->add_signature(sig);
470
exec_list call_params;
471
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
472
call_params.push_tail(offset->clone(mem_ctx, NULL));
473
call_params.push_tail(deref->clone(mem_ctx, NULL));
474
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
475
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
476
return new(mem_ctx) ir_call(sig, NULL, &call_params);
480
lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
481
const struct glsl_type *type,
484
exec_list sig_params;
486
ir_variable *block_ref = new(mem_ctx)
487
ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
488
sig_params.push_tail(block_ref);
490
ir_variable *offset_ref = new(mem_ctx)
491
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
492
sig_params.push_tail(offset_ref);
494
ir_variable *access_ref = new(mem_ctx)
495
ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
496
sig_params.push_tail(access_ref);
498
ir_function_signature *sig =
499
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
501
sig->replace_parameters(&sig_params);
502
sig->intrinsic_id = ir_intrinsic_ssbo_load;
504
ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
505
f->add_signature(sig);
507
ir_variable *result = new(mem_ctx)
508
ir_variable(type, "ssbo_load_result", ir_var_temporary);
509
base_ir->insert_before(result);
510
ir_dereference_variable *deref_result = new(mem_ctx)
511
ir_dereference_variable(result);
513
exec_list call_params;
514
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
515
call_params.push_tail(offset->clone(mem_ctx, NULL));
516
call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
518
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
522
lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
523
ir_dereference *deref,
524
const glsl_type *type,
529
switch (this->buffer_access_type) {
530
case ubo_load_access:
531
base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
532
ubo_load(mem_ctx, type, offset),
535
case ssbo_load_access: {
536
ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
537
base_ir->insert_before(load_ssbo);
538
ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
539
ir_assignment *assignment =
540
assign(deref->clone(mem_ctx, NULL), value, mask);
541
base_ir->insert_before(assignment);
544
case ssbo_store_access:
546
base_ir->insert_after(ssbo_store(mem_ctx,
547
swizzle(deref, channel, 1),
550
base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
554
unreachable("invalid buffer_access_type in insert_buffer_access");
559
lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
560
ir_dereference *deref,
562
ir_variable *write_var,
565
ir_rvalue *offset = NULL;
566
unsigned const_offset;
568
const glsl_type *matrix_type;
570
enum glsl_interface_packing packing =
571
var->get_interface_type()->
572
get_internal_ifc_packing(use_std430_as_default);
574
this->buffer_access_type = ssbo_store_access;
575
this->variable = var;
577
/* Compute the offset to the start if the dereference as well as other
578
* information we need to configure the write
580
setup_for_load_or_store(mem_ctx, var, deref,
581
&offset, &const_offset,
582
&row_major, &matrix_type,
586
/* Now emit writes from the temporary to memory */
587
ir_variable *write_offset =
588
new(mem_ctx) ir_variable(glsl_type::uint_type,
589
"ssbo_store_temp_offset",
592
base_ir->insert_before(write_offset);
593
base_ir->insert_before(assign(write_offset, offset));
595
deref = new(mem_ctx) ir_dereference_variable(write_var);
596
emit_access(mem_ctx, true, deref, write_offset, const_offset,
597
row_major, matrix_type, packing, write_mask);
601
lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
603
check_ssbo_unsized_array_length_expression(ir);
604
return rvalue_visit(ir);
608
lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
610
if (expr->operation !=
611
ir_expression_operation(ir_unop_ssbo_unsized_array_length))
614
ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
616
!rvalue->type->is_array() || !rvalue->type->is_unsized_array())
619
ir_dereference *deref = expr->operands[0]->as_dereference();
623
ir_variable *var = expr->operands[0]->variable_referenced();
624
if (!var || !var->is_in_shader_storage_block())
626
return process_ssbo_unsized_array_length(&rvalue, deref, var);
630
lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
633
ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
634
/* Don't replace this unop if it is found alone. It is going to be
635
* removed by the optimization passes or replaced if it is part of
636
* an ir_assignment or another ir_expression.
641
for (unsigned i = 0; i < ir->num_operands; i++) {
642
if (ir->operands[i]->ir_type != ir_type_expression)
644
ir_expression *expr = (ir_expression *) ir->operands[i];
645
ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
650
ir->operands[i] = temp;
655
lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
657
if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
660
ir_expression *expr = (ir_expression *) ir->rhs;
661
ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
671
lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
673
ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
674
return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
680
lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
681
enum glsl_interface_packing packing)
683
unsigned array_stride = 0;
685
switch (deref->ir_type) {
686
case ir_type_dereference_variable:
688
ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
689
const struct glsl_type *unsized_array_type = NULL;
690
/* An unsized array can be sized by other lowering passes, so pick
691
* the first field of the array which has the data type of the unsized
694
unsized_array_type = deref_var->var->type->fields.array;
696
/* Whether or not the field is row-major (because it might be a
697
* bvec2 or something) does not affect the array itself. We need
698
* to know whether an array element in its entirety is row-major.
700
const bool array_row_major =
701
is_dereferenced_thing_row_major(deref_var);
703
if (packing == GLSL_INTERFACE_PACKING_STD430) {
704
array_stride = unsized_array_type->std430_array_stride(array_row_major);
706
array_stride = unsized_array_type->std140_size(array_row_major);
707
array_stride = glsl_align(array_stride, 16);
711
case ir_type_dereference_record:
713
ir_dereference_record *deref_record = (ir_dereference_record *) deref;
714
ir_dereference *interface_deref =
715
deref_record->record->as_dereference();
716
assert(interface_deref != NULL);
717
const struct glsl_type *interface_type = interface_deref->type;
718
unsigned record_length = interface_type->length;
719
/* Unsized array is always the last element of the interface */
720
const struct glsl_type *unsized_array_type =
721
interface_type->fields.structure[record_length - 1].type->fields.array;
723
const bool array_row_major =
724
is_dereferenced_thing_row_major(deref_record);
726
if (packing == GLSL_INTERFACE_PACKING_STD430) {
727
array_stride = unsized_array_type->std430_array_stride(array_row_major);
729
array_stride = unsized_array_type->std140_size(array_row_major);
730
array_stride = glsl_align(array_stride, 16);
735
unreachable("Unsupported dereference type");
741
lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
742
ir_dereference *deref,
745
void *mem_ctx = ralloc_parent(*rvalue);
747
ir_rvalue *base_offset = NULL;
748
unsigned const_offset;
750
const glsl_type *matrix_type;
752
enum glsl_interface_packing packing =
753
var->get_interface_type()->
754
get_internal_ifc_packing(use_std430_as_default);
755
int unsized_array_stride =
756
calculate_unsized_array_stride(deref, packing);
758
this->buffer_access_type = ssbo_unsized_array_length_access;
759
this->variable = var;
761
/* Compute the offset to the start if the dereference as well as other
762
* information we need to calculate the length.
764
setup_for_load_or_store(mem_ctx, var, deref,
765
&base_offset, &const_offset,
766
&row_major, &matrix_type,
769
* max((buffer_object_size - offset_of_array) / stride_of_array, 0)
771
ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
773
ir_expression *offset_of_array = new(mem_ctx)
774
ir_expression(ir_binop_add, base_offset,
775
new(mem_ctx) ir_constant(const_offset));
776
ir_expression *offset_of_array_int = new(mem_ctx)
777
ir_expression(ir_unop_u2i, offset_of_array);
779
ir_expression *sub = new(mem_ctx)
780
ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
781
ir_expression *div = new(mem_ctx)
782
ir_expression(ir_binop_div, sub,
783
new(mem_ctx) ir_constant(unsized_array_stride));
784
ir_expression *max = new(mem_ctx)
785
ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
791
lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
796
ir_rvalue *rvalue = ir->lhs->as_rvalue();
800
ir_dereference *deref = ir->lhs->as_dereference();
804
ir_variable *var = ir->lhs->variable_referenced();
805
if (!var || !var->is_in_shader_storage_block())
808
/* We have a write to a buffer variable, so declare a temporary and rewrite
809
* the assignment so that the temporary is the LHS.
811
void *mem_ctx = ralloc_parent(shader->ir);
813
const glsl_type *type = rvalue->type;
814
ir_variable *write_var = new(mem_ctx) ir_variable(type,
817
base_ir->insert_before(write_var);
818
ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
820
/* Now we have to write the value assigned to the temporary back to memory */
821
write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
826
is_buffer_backed_variable(ir_variable *var)
828
return var->is_in_buffer_block() ||
829
var->data.mode == ir_var_shader_shared;
833
lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
835
if (!ir || !ir->lhs || !ir->rhs)
838
/* LHS and RHS must be arrays
839
* FIXME: arrays of arrays?
841
if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
844
/* RHS must be a buffer-backed variable. This is what can cause the problem
845
* since it would lead to a series of loads that need to live until we
846
* see the writes to the LHS.
848
ir_variable *rhs_var = ir->rhs->variable_referenced();
849
if (!rhs_var || !is_buffer_backed_variable(rhs_var))
852
/* Split the array copy into individual element copies to reduce
855
ir_dereference *rhs_deref = ir->rhs->as_dereference();
859
ir_dereference *lhs_deref = ir->lhs->as_dereference();
863
assert(lhs_deref->type->length == rhs_deref->type->length);
864
void *mem_ctx = ralloc_parent(shader->ir);
866
for (unsigned i = 0; i < lhs_deref->type->length; i++) {
867
ir_dereference *lhs_i =
868
new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
869
new(mem_ctx) ir_constant(i));
871
ir_dereference *rhs_i =
872
new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
873
new(mem_ctx) ir_constant(i));
874
ir->insert_after(assign(lhs_i, rhs_i));
883
lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
885
if (!ir || !ir->lhs || !ir->rhs)
888
/* LHS and RHS must be records */
889
if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct())
892
/* RHS must be a buffer-backed variable. This is what can cause the problem
893
* since it would lead to a series of loads that need to live until we
894
* see the writes to the LHS.
896
ir_variable *rhs_var = ir->rhs->variable_referenced();
897
if (!rhs_var || !is_buffer_backed_variable(rhs_var))
900
/* Split the struct copy into individual element copies to reduce
903
ir_dereference *rhs_deref = ir->rhs->as_dereference();
907
ir_dereference *lhs_deref = ir->lhs->as_dereference();
911
assert(lhs_deref->type == rhs_deref->type);
912
void *mem_ctx = ralloc_parent(shader->ir);
914
for (unsigned i = 0; i < lhs_deref->type->length; i++) {
915
const char *field_name = lhs_deref->type->fields.structure[i].name;
916
ir_dereference *lhs_field =
917
new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
919
ir_dereference *rhs_field =
920
new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
922
ir->insert_after(assign(lhs_field, rhs_field));
931
lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
933
/* Array and struct copies could involve large amounts of load/store
934
* operations. To improve register pressure we want to special-case
935
* these and split them into individual element copies.
936
* This way we avoid emitting all the loads for the RHS first and
937
* all the writes for the LHS second and register usage is more
940
if (check_for_buffer_array_copy(ir))
941
return visit_continue_with_parent;
943
if (check_for_buffer_struct_copy(ir))
944
return visit_continue_with_parent;
946
check_ssbo_unsized_array_length_assignment(ir);
947
check_for_ssbo_store(ir);
948
return rvalue_visit(ir);
951
/* Lowers the intrinsic call to a new internal intrinsic that swaps the
952
* access to the buffer variable in the first parameter by an offset
953
* and block index. This involves creating the new internal intrinsic
954
* (i.e. the new function signature).
957
lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
959
/* SSBO atomics usually have 2 parameters, the buffer variable and an
960
* integer argument. The exception is CompSwap, that has an additional
963
int param_count = ir->actual_parameters.length();
964
assert(param_count == 2 || param_count == 3);
966
/* First argument must be a scalar integer buffer variable */
967
exec_node *param = ir->actual_parameters.get_head();
968
ir_instruction *inst = (ir_instruction *) param;
969
assert(inst->ir_type == ir_type_dereference_variable ||
970
inst->ir_type == ir_type_dereference_array ||
971
inst->ir_type == ir_type_dereference_record ||
972
inst->ir_type == ir_type_swizzle);
974
ir_rvalue *deref = (ir_rvalue *) inst;
975
assert(deref->type->is_scalar() &&
976
(deref->type->is_integer_32_64() || deref->type->is_float()));
978
ir_variable *var = deref->variable_referenced();
981
/* Compute the offset to the start if the dereference and the
984
void *mem_ctx = ralloc_parent(shader->ir);
986
ir_rvalue *offset = NULL;
987
unsigned const_offset;
989
const glsl_type *matrix_type;
991
enum glsl_interface_packing packing =
992
var->get_interface_type()->
993
get_internal_ifc_packing(use_std430_as_default);
995
this->buffer_access_type = ssbo_atomic_access;
996
this->variable = var;
998
setup_for_load_or_store(mem_ctx, var, deref,
999
&offset, &const_offset,
1000
&row_major, &matrix_type,
1004
assert(matrix_type == NULL);
1006
ir_rvalue *deref_offset =
1007
add(offset, new(mem_ctx) ir_constant(const_offset));
1008
ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
1010
/* Create the new internal function signature that will take a block
1011
* index and offset instead of a buffer variable
1013
exec_list sig_params;
1014
ir_variable *sig_param = new(mem_ctx)
1015
ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
1016
sig_params.push_tail(sig_param);
1018
sig_param = new(mem_ctx)
1019
ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1020
sig_params.push_tail(sig_param);
1022
const glsl_type *type = deref->type->get_scalar_type();
1023
sig_param = new(mem_ctx)
1024
ir_variable(type, "data1", ir_var_function_in);
1025
sig_params.push_tail(sig_param);
1027
if (param_count == 3) {
1028
sig_param = new(mem_ctx)
1029
ir_variable(type, "data2", ir_var_function_in);
1030
sig_params.push_tail(sig_param);
1033
ir_function_signature *sig =
1034
new(mem_ctx) ir_function_signature(deref->type,
1035
shader_storage_buffer_object);
1037
sig->replace_parameters(&sig_params);
1039
assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1040
assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1041
sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1044
sprintf(func_name, "%s_ssbo", ir->callee_name());
1045
ir_function *f = new(mem_ctx) ir_function(func_name);
1046
f->add_signature(sig);
1048
/* Now, create the call to the internal intrinsic */
1049
exec_list call_params;
1050
call_params.push_tail(block_index);
1051
call_params.push_tail(deref_offset);
1052
param = ir->actual_parameters.get_head()->get_next();
1053
ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1054
call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1055
if (param_count == 3) {
1056
param = param->get_next();
1057
param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1058
call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1060
ir_dereference_variable *return_deref =
1061
ir->return_deref->clone(mem_ctx, NULL);
1062
return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1066
lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1068
exec_list& params = ir->actual_parameters;
1070
if (params.length() < 2 || params.length() > 3)
1074
((ir_instruction *) params.get_head())->as_rvalue();
1078
ir_variable *var = rvalue->variable_referenced();
1079
if (!var || !var->is_in_shader_storage_block())
1082
const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1083
if (id == ir_intrinsic_generic_atomic_add ||
1084
id == ir_intrinsic_generic_atomic_min ||
1085
id == ir_intrinsic_generic_atomic_max ||
1086
id == ir_intrinsic_generic_atomic_and ||
1087
id == ir_intrinsic_generic_atomic_or ||
1088
id == ir_intrinsic_generic_atomic_xor ||
1089
id == ir_intrinsic_generic_atomic_exchange ||
1090
id == ir_intrinsic_generic_atomic_comp_swap) {
1091
return lower_ssbo_atomic_intrinsic(ir);
1099
lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1101
ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1104
base_ir->replace_with(new_ir);
1105
return visit_continue_with_parent;
1108
return rvalue_visit(ir);
1113
lower_ubo_reference_visitor::visit_enter(ir_texture *ir)
1115
ir_dereference *sampler = ir->sampler;
1117
if (sampler->ir_type == ir_type_dereference_record) {
1118
handle_rvalue((ir_rvalue **)&ir->sampler);
1119
return visit_continue_with_parent;
1122
return rvalue_visit(ir);
1126
} /* unnamed namespace */
1129
lower_ubo_reference(struct gl_linked_shader *shader,
1130
bool clamp_block_indices, bool use_std430_as_default)
1132
lower_ubo_reference_visitor v(shader, clamp_block_indices,
1133
use_std430_as_default);
1135
/* Loop over the instructions lowering references, because we take
1136
* a deref of a UBO array using a UBO dereference as the index will
1137
* produce a collection of instructions all of which have cloned
1138
* UBO dereferences for that array index.
1142
visit_list_elements(&v, shader->ir);
1143
} while (v.progress);