65
67
return nir_vec(b, dst_comps, num_dst_comps);
69
build_load_ptr_dxil(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *idx)
71
return nir_load_ptr_dxil(b, 1, 32, &deref->dest.ssa, idx);
75
lower_load_deref(nir_builder *b, nir_intrinsic_instr *intr)
77
assert(intr->dest.is_ssa);
79
b->cursor = nir_before_instr(&intr->instr);
81
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
82
if (!nir_deref_mode_is(deref, nir_var_shader_temp))
84
nir_ssa_def *ptr = nir_u2u32(b, nir_build_deref_offset(b, deref, cl_type_size_align));
85
nir_ssa_def *offset = nir_iand(b, ptr, nir_inot(b, nir_imm_int(b, 3)));
87
assert(intr->dest.is_ssa);
88
unsigned num_components = nir_dest_num_components(intr->dest);
89
unsigned bit_size = nir_dest_bit_size(intr->dest);
90
unsigned load_size = MAX2(32, bit_size);
91
unsigned num_bits = num_components * bit_size;
92
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
93
unsigned comp_idx = 0;
96
nir_deref_path_init(&path, deref, NULL);
97
nir_ssa_def *base_idx = nir_ishr(b, offset, nir_imm_int(b, 2 /* log2(32 / 8) */));
99
/* Split loads into 32-bit chunks */
100
for (unsigned i = 0; i < num_bits; i += load_size) {
101
unsigned subload_num_bits = MIN2(num_bits - i, load_size);
102
nir_ssa_def *idx = nir_iadd(b, base_idx, nir_imm_int(b, i / 32));
103
nir_ssa_def *vec32 = build_load_ptr_dxil(b, path.path[0], idx);
105
if (load_size == 64) {
106
idx = nir_iadd(b, idx, nir_imm_int(b, 1));
107
vec32 = nir_vec2(b, vec32,
108
build_load_ptr_dxil(b, path.path[0], idx));
111
/* If we have 2 bytes or less to load we need to adjust the u32 value so
112
* we can always extract the LSB.
114
if (subload_num_bits <= 16) {
115
nir_ssa_def *shift = nir_imul(b, nir_iand(b, ptr, nir_imm_int(b, 3)),
117
vec32 = nir_ushr(b, vec32, shift);
120
/* And now comes the pack/unpack step to match the original type. */
121
nir_ssa_def *temp_vec = nir_extract_bits(b, &vec32, 1, 0, subload_num_bits / bit_size, bit_size);
122
for (unsigned comp = 0; comp < subload_num_bits / bit_size; ++comp, ++comp_idx)
123
comps[comp_idx] = nir_channel(b, temp_vec, comp);
126
nir_deref_path_finish(&path);
127
assert(comp_idx == num_components);
128
nir_ssa_def *result = nir_vec(b, comps, num_components);
129
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
130
nir_instr_remove(&intr->instr);
135
ubo_load_select_32b_comps(nir_builder *b, nir_ssa_def *vec32,
136
nir_ssa_def *offset, unsigned alignment)
138
assert(alignment >= 16 || alignment == 8 ||
139
alignment == 4 || alignment == 2 ||
141
assert(vec32->num_components == 4);
146
nir_ssa_def *comps[4];
149
for (unsigned i = 0; i < 4; i++)
150
comps[i] = nir_channel(b, vec32, i);
152
/* If we have 8bytes alignment or less, select which half the vec4 should
155
cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x8)),
158
comps[0] = nir_bcsel(b, cond, comps[2], comps[0]);
159
comps[1] = nir_bcsel(b, cond, comps[3], comps[1]);
162
return nir_vec(b, comps, 2);
164
/* 4 byte align or less needed, select which of the 32bit component should be
165
* used and return it. The sub-32bit split is handled in nir_extract_bits().
167
cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x4)),
169
return nir_bcsel(b, cond, comps[1], comps[0]);
173
build_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer,
174
nir_ssa_def *offset, unsigned num_components,
175
unsigned bit_size, unsigned alignment)
177
nir_ssa_def *idx = nir_ushr(b, offset, nir_imm_int(b, 4));
178
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
179
unsigned num_bits = num_components * bit_size;
180
unsigned comp_idx = 0;
182
/* We need to split loads in 16byte chunks because that's the
183
* granularity of cBufferLoadLegacy().
185
for (unsigned i = 0; i < num_bits; i += (16 * 8)) {
186
/* For each 16byte chunk (or smaller) we generate a 32bit ubo vec
189
unsigned subload_num_bits = MIN2(num_bits - i, 16 * 8);
191
nir_load_ubo_dxil(b, 4, 32, buffer, nir_iadd(b, idx, nir_imm_int(b, i / (16 * 8))));
193
/* First re-arrange the vec32 to account for intra 16-byte offset. */
194
assert(subload_num_bits / 8 <= alignment);
195
vec32 = ubo_load_select_32b_comps(b, vec32, offset, alignment);
197
/* If we have 2 bytes or less to load we need to adjust the u32 value so
198
* we can always extract the LSB.
200
if (alignment <= 2) {
201
nir_ssa_def *shift = nir_imul(b, nir_iand(b, offset,
204
vec32 = nir_ushr(b, vec32, shift);
207
/* And now comes the pack/unpack step to match the original type. */
208
nir_ssa_def *temp_vec = nir_extract_bits(b, &vec32, 1, 0, subload_num_bits / bit_size, bit_size);
209
for (unsigned comp = 0; comp < subload_num_bits / bit_size; ++comp, ++comp_idx)
210
comps[comp_idx] = nir_channel(b, temp_vec, comp);
213
assert(comp_idx == num_components);
214
return nir_vec(b, comps, num_components);
218
lower_load_ssbo(nir_builder *b, nir_intrinsic_instr *intr, unsigned min_bit_size)
220
assert(intr->dest.is_ssa);
221
assert(intr->src[0].is_ssa);
222
assert(intr->src[1].is_ssa);
224
b->cursor = nir_before_instr(&intr->instr);
226
unsigned src_bit_size = nir_dest_bit_size(intr->dest);
227
unsigned store_bit_size = CLAMP(src_bit_size, min_bit_size, 32);
228
unsigned offset_mask = store_bit_size / 8 - 1;
230
nir_ssa_def *buffer = intr->src[0].ssa;
231
nir_ssa_def *offset = nir_iand(b, intr->src[1].ssa, nir_imm_int(b, ~offset_mask));
232
enum gl_access_qualifier access = nir_intrinsic_access(intr);
233
unsigned num_components = nir_dest_num_components(intr->dest);
234
unsigned num_bits = num_components * src_bit_size;
236
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
237
unsigned comp_idx = 0;
239
/* We need to split loads in 4-component chunks because that's the optimal
240
* granularity of bufferLoad(). Minimum alignment is 2-byte.
242
for (unsigned i = 0; i < num_bits; i += 4 * store_bit_size) {
243
/* For each 4-component chunk (or smaller) we generate a N-bit ssbo vec load. */
244
unsigned subload_num_bits = MIN2(num_bits - i, 4 * store_bit_size);
246
/* The number of components to store depends on the number of bytes. */
247
nir_ssa_def *result =
248
nir_load_ssbo(b, DIV_ROUND_UP(subload_num_bits, store_bit_size), store_bit_size,
249
buffer, nir_iadd(b, offset, nir_imm_int(b, i / 8)),
250
.align_mul = store_bit_size / 8,
254
/* If we have an unaligned load we need to adjust the result value so
255
* we can always extract the LSB.
257
if (nir_intrinsic_align(intr) < store_bit_size / 8) {
258
nir_ssa_def *shift = nir_imul(b, nir_iand(b, intr->src[1].ssa, nir_imm_int(b, offset_mask)),
260
result = nir_ushr(b, result, shift);
263
/* And now comes the pack/unpack step to match the original type. */
264
nir_ssa_def *temp_vec = nir_extract_bits(b, &result, 1, 0, subload_num_bits / src_bit_size, src_bit_size);
265
for (unsigned comp = 0; comp < subload_num_bits / src_bit_size; ++comp, ++comp_idx)
266
comps[comp_idx] = nir_channel(b, temp_vec, comp);
269
assert(comp_idx == num_components);
270
nir_ssa_def *result = nir_vec(b, comps, num_components);
271
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
272
nir_instr_remove(&intr->instr);
277
lower_store_ssbo(nir_builder *b, nir_intrinsic_instr *intr, unsigned min_bit_size)
279
b->cursor = nir_before_instr(&intr->instr);
281
assert(intr->src[0].is_ssa);
282
assert(intr->src[1].is_ssa);
283
assert(intr->src[2].is_ssa);
285
nir_ssa_def *val = intr->src[0].ssa;
286
nir_ssa_def *buffer = intr->src[1].ssa;
288
unsigned src_bit_size = val->bit_size;
289
unsigned store_bit_size = CLAMP(src_bit_size, min_bit_size, 32);
290
unsigned masked_store_bit_size = 32;
291
unsigned num_components = val->num_components;
292
unsigned num_bits = num_components * src_bit_size;
294
unsigned offset_mask = store_bit_size / 8 - 1;
295
unsigned masked_store_offset_mask = masked_store_bit_size / 8 - 1;
296
nir_ssa_def *offset = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, ~offset_mask));
297
nir_ssa_def *masked_offset = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, ~masked_store_offset_mask));
299
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
300
unsigned comp_idx = 0;
302
unsigned write_mask = nir_intrinsic_write_mask(intr);
303
for (unsigned i = 0; i < num_components; i++)
304
if (write_mask & (1 << i))
305
comps[i] = nir_channel(b, val, i);
307
/* We split stores in 4-component chunks because that's the optimal granularity
308
* of bufferStore(). Minimum alignment is 2-byte. */
309
unsigned bit_offset = 0;
311
/* Skip over holes in the write mask */
312
while (comp_idx < num_components && comps[comp_idx] == NULL) {
314
bit_offset += src_bit_size;
316
if (comp_idx >= num_components)
319
/* For each 4-component chunk (or smaller) we generate a ssbo vec
320
* store. If a component is skipped by the write mask, do a smaller
323
unsigned num_src_comps_stored = 0, substore_num_bits = 0;
324
while(num_src_comps_stored + comp_idx < num_components &&
325
substore_num_bits + bit_offset < num_bits &&
326
substore_num_bits < 4 * store_bit_size &&
327
comps[comp_idx + num_src_comps_stored]) {
328
++num_src_comps_stored;
329
substore_num_bits += src_bit_size;
331
bool force_masked = false;
332
if (substore_num_bits > store_bit_size &&
333
substore_num_bits % store_bit_size != 0) {
334
/* Split this into two, one unmasked store of the first bits,
335
* and then the second loop iteration will handle a masked store
337
assert(num_src_comps_stored == 3);
338
if (store_bit_size == 16) {
339
assert(substore_num_bits < 32);
340
/* If we're already doing atomics to store, just do one
341
* 32bit masked store instead of a 16bit store and a masked
342
* store for the other 8 bits. */
345
--num_src_comps_stored;
346
substore_num_bits = store_bit_size;
349
nir_intrinsic_instr *store;
351
if (substore_num_bits < store_bit_size || force_masked) {
352
nir_ssa_def *store_vec = load_comps_to_vec(b, src_bit_size, &comps[comp_idx],
353
num_src_comps_stored, masked_store_bit_size);
354
nir_ssa_def *mask = nir_imm_intN_t(b, (1 << substore_num_bits) - 1, masked_store_bit_size);
356
/* If we have small alignments we need to place them correctly in the component. */
357
if (nir_intrinsic_align(intr) <= masked_store_bit_size / 8) {
358
nir_ssa_def *pos = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, masked_store_offset_mask));
359
nir_ssa_def *shift = nir_imul_imm(b, pos, 8);
361
store_vec = nir_ishl(b, store_vec, shift);
362
mask = nir_ishl(b, mask, shift);
365
nir_ssa_def *local_offset = nir_iadd(b, masked_offset, nir_imm_int(b, bit_offset / 8));
366
store = nir_intrinsic_instr_create(b->shader,
367
nir_intrinsic_store_ssbo_masked_dxil);
368
store->src[0] = nir_src_for_ssa(store_vec);
369
store->src[1] = nir_src_for_ssa(nir_inot(b, mask));
370
store->src[2] = nir_src_for_ssa(buffer);
371
store->src[3] = nir_src_for_ssa(local_offset);
373
nir_ssa_def *local_offset = nir_iadd(b, offset, nir_imm_int(b, bit_offset / 8));
374
nir_ssa_def *store_vec = load_comps_to_vec(b, src_bit_size, &comps[comp_idx],
375
num_src_comps_stored, store_bit_size);
376
store = nir_intrinsic_instr_create(b->shader,
377
nir_intrinsic_store_ssbo);
378
store->src[0] = nir_src_for_ssa(store_vec);
379
store->src[1] = nir_src_for_ssa(buffer);
380
store->src[2] = nir_src_for_ssa(local_offset);
382
nir_intrinsic_set_align(store, store_bit_size / 8, 0);
385
/* The number of components to store depends on the number of bits. */
386
store->num_components = DIV_ROUND_UP(substore_num_bits, store_bit_size);
387
nir_builder_instr_insert(b, &store->instr);
388
comp_idx += num_src_comps_stored;
389
bit_offset += substore_num_bits;
391
if (nir_intrinsic_has_write_mask(store))
392
nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
395
nir_instr_remove(&intr->instr);
400
lower_load_vec32(nir_builder *b, nir_ssa_def *index, unsigned num_comps, nir_ssa_def **comps, nir_intrinsic_op op)
402
for (unsigned i = 0; i < num_comps; i++) {
403
nir_intrinsic_instr *load =
404
nir_intrinsic_instr_create(b->shader, op);
406
load->num_components = 1;
407
load->src[0] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
408
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
409
nir_builder_instr_insert(b, &load->instr);
410
comps[i] = &load->dest.ssa;
415
lower_32b_offset_load(nir_builder *b, nir_intrinsic_instr *intr)
417
assert(intr->dest.is_ssa);
418
unsigned bit_size = nir_dest_bit_size(intr->dest);
419
unsigned num_components = nir_dest_num_components(intr->dest);
420
unsigned num_bits = num_components * bit_size;
422
b->cursor = nir_before_instr(&intr->instr);
423
nir_intrinsic_op op = intr->intrinsic;
71
lower_32b_offset_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var)
73
assert(intr->dest.is_ssa);
74
unsigned bit_size = nir_dest_bit_size(intr->dest);
75
unsigned num_components = nir_dest_num_components(intr->dest);
76
unsigned num_bits = num_components * bit_size;
78
b->cursor = nir_before_instr(&intr->instr);
425
80
assert(intr->src[0].is_ssa);
426
81
nir_ssa_def *offset = intr->src[0].ssa;
427
if (op == nir_intrinsic_load_shared) {
428
offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_base(intr)));
429
op = nir_intrinsic_load_shared_dxil;
82
if (intr->intrinsic == nir_intrinsic_load_shared)
83
offset = nir_iadd_imm(b, offset, nir_intrinsic_base(intr));
431
85
offset = nir_u2u32(b, offset);
432
op = nir_intrinsic_load_scratch_dxil;
434
nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
86
nir_ssa_def *index = nir_ushr_imm(b, offset, 2);
435
87
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
436
88
nir_ssa_def *comps_32bit[NIR_MAX_VEC_COMPONENTS * 2];
475
lower_store_vec32(nir_builder *b, nir_ssa_def *index, nir_ssa_def *vec32, nir_intrinsic_op op)
477
for (unsigned i = 0; i < vec32->num_components; i++) {
478
nir_intrinsic_instr *store =
479
nir_intrinsic_instr_create(b->shader, op);
481
store->src[0] = nir_src_for_ssa(nir_channel(b, vec32, i));
482
store->src[1] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
483
store->num_components = 1;
484
nir_builder_instr_insert(b, &store->instr);
489
127
lower_masked_store_vec32(nir_builder *b, nir_ssa_def *offset, nir_ssa_def *index,
490
nir_ssa_def *vec32, unsigned num_bits, nir_intrinsic_op op, unsigned alignment)
128
nir_ssa_def *vec32, unsigned num_bits, nir_variable *var, unsigned alignment)
492
130
nir_ssa_def *mask = nir_imm_int(b, (1 << num_bits) - 1);
494
132
/* If we have small alignments, we need to place them correctly in the u32 component. */
495
133
if (alignment <= 2) {
496
134
nir_ssa_def *shift =
497
nir_imul_imm(b, nir_iand(b, offset, nir_imm_int(b, 3)), 8);
135
nir_imul_imm(b, nir_iand_imm(b, offset, 3), 8);
499
137
vec32 = nir_ishl(b, vec32, shift);
500
138
mask = nir_ishl(b, mask, shift);
503
if (op == nir_intrinsic_store_shared_dxil) {
141
if (var->data.mode == nir_var_mem_shared) {
504
142
/* Use the dedicated masked intrinsic */
505
nir_store_shared_masked_dxil(b, vec32, nir_inot(b, mask), index);
143
nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, var), index);
144
nir_deref_atomic(b, 32, &deref->dest.ssa, nir_inot(b, mask), .atomic_op = nir_atomic_op_iand);
145
nir_deref_atomic(b, 32, &deref->dest.ssa, vec32, .atomic_op = nir_atomic_op_ior);
507
147
/* For scratch, since we don't need atomics, just generate the read-modify-write in NIR */
508
nir_ssa_def *load = nir_load_scratch_dxil(b, 1, 32, index);
148
nir_ssa_def *load = nir_load_array_var(b, var, index);
510
150
nir_ssa_def *new_val = nir_ior(b, vec32,
512
152
nir_inot(b, mask),
515
lower_store_vec32(b, index, new_val, op);
155
nir_store_array_var(b, var, index, new_val, 1);
520
lower_32b_offset_store(nir_builder *b, nir_intrinsic_instr *intr)
160
lower_32b_offset_store(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var)
522
162
assert(intr->src[0].is_ssa);
523
163
unsigned num_components = nir_src_num_components(intr->src[0]);
688
lower_load_ubo(nir_builder *b, nir_intrinsic_instr *intr)
690
assert(intr->dest.is_ssa);
290
flatten_var_arrays(nir_builder *b, nir_instr *instr, void *data)
292
if (instr->type != nir_instr_type_intrinsic)
294
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
295
switch (intr->intrinsic) {
296
case nir_intrinsic_load_deref:
297
case nir_intrinsic_store_deref:
298
case nir_intrinsic_deref_atomic:
299
case nir_intrinsic_deref_atomic_swap:
305
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
306
nir_variable *var = NULL;
307
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
308
if (d->deref_type == nir_deref_type_cast)
310
if (d->deref_type == nir_deref_type_var) {
312
if (d->type == var->type)
320
nir_deref_path_init(&path, deref, NULL);
322
assert(path.path[0]->deref_type == nir_deref_type_var);
323
b->cursor = nir_before_instr(&path.path[0]->instr);
324
nir_deref_instr *new_var_deref = nir_build_deref_var(b, var);
325
nir_ssa_def *index = NULL;
326
for (unsigned level = 1; path.path[level]; ++level) {
327
nir_deref_instr *arr_deref = path.path[level];
328
assert(arr_deref->deref_type == nir_deref_type_array);
329
b->cursor = nir_before_instr(&arr_deref->instr);
330
nir_ssa_def *val = nir_imul_imm(b, arr_deref->arr.index.ssa,
331
glsl_get_component_slots(arr_deref->type));
333
index = nir_iadd(b, index, val);
339
unsigned vector_comps = intr->num_components;
340
if (vector_comps > 1) {
341
b->cursor = nir_before_instr(instr);
342
if (intr->intrinsic == nir_intrinsic_load_deref) {
343
nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
344
for (unsigned i = 0; i < vector_comps; ++i) {
345
nir_ssa_def *final_index = index ? nir_iadd_imm(b, index, i) : nir_imm_int(b, i);
346
nir_deref_instr *comp_deref = nir_build_deref_array(b, new_var_deref, final_index);
347
components[i] = nir_load_deref(b, comp_deref);
349
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, components, vector_comps));
350
} else if (intr->intrinsic == nir_intrinsic_store_deref) {
351
for (unsigned i = 0; i < vector_comps; ++i) {
352
if (((1 << i) & nir_intrinsic_write_mask(intr)) == 0)
354
nir_ssa_def *final_index = index ? nir_iadd_imm(b, index, i) : nir_imm_int(b, i);
355
nir_deref_instr *comp_deref = nir_build_deref_array(b, new_var_deref, final_index);
356
nir_store_deref(b, comp_deref, nir_channel(b, intr->src[1].ssa, i), 1);
359
nir_instr_remove(instr);
361
nir_src_rewrite_ssa(&intr->src[0], &nir_build_deref_array(b, new_var_deref, index)->dest.ssa);
364
nir_deref_path_finish(&path);
369
flatten_constant_initializer(nir_variable *var, nir_constant *src, nir_constant ***dest, unsigned vector_elements)
371
if (src->num_elements == 0) {
372
for (unsigned i = 0; i < vector_elements; ++i) {
373
nir_constant *new_scalar = rzalloc(var, nir_constant);
374
memcpy(&new_scalar->values[0], &src->values[i], sizeof(src->values[0]));
375
new_scalar->is_null_constant = src->values[i].u64 == 0;
377
nir_constant **array_entry = (*dest)++;
378
*array_entry = new_scalar;
381
for (unsigned i = 0; i < src->num_elements; ++i)
382
flatten_constant_initializer(var, src->elements[i], dest, vector_elements);
387
flatten_var_array_types(nir_variable *var)
389
assert(!glsl_type_is_struct(glsl_without_array(var->type)));
390
const struct glsl_type *matrix_type = glsl_without_array(var->type);
391
if (!glsl_type_is_array_of_arrays(var->type) && glsl_get_components(matrix_type) == 1)
394
enum glsl_base_type base_type = glsl_get_base_type(matrix_type);
395
const struct glsl_type *flattened_type = glsl_array_type(glsl_scalar_type(base_type),
396
glsl_get_component_slots(var->type), 0);
397
var->type = flattened_type;
398
if (var->constant_initializer) {
399
nir_constant **new_elements = ralloc_array(var, nir_constant *, glsl_get_length(flattened_type));
400
nir_constant **temp = new_elements;
401
flatten_constant_initializer(var, var->constant_initializer, &temp, glsl_get_vector_elements(matrix_type));
402
var->constant_initializer->num_elements = glsl_get_length(flattened_type);
403
var->constant_initializer->elements = new_elements;
409
dxil_nir_flatten_var_arrays(nir_shader *shader, nir_variable_mode modes)
411
bool progress = false;
412
nir_foreach_variable_with_modes(var, shader, modes & ~nir_var_function_temp)
413
progress |= flatten_var_array_types(var);
415
if (modes & nir_var_function_temp) {
416
nir_foreach_function(func, shader) {
419
nir_foreach_function_temp_variable(var, func->impl)
420
progress |= flatten_var_array_types(var);
427
nir_shader_instructions_pass(shader, flatten_var_arrays,
428
nir_metadata_block_index |
429
nir_metadata_dominance |
430
nir_metadata_loop_analysis,
432
nir_remove_dead_derefs(shader);
437
lower_deref_bit_size(nir_builder *b, nir_instr *instr, void *data)
439
if (instr->type != nir_instr_type_intrinsic)
441
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
442
switch (intr->intrinsic) {
443
case nir_intrinsic_load_deref:
444
case nir_intrinsic_store_deref:
447
/* Atomics can't be smaller than 32-bit */
451
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
452
nir_variable *var = nir_deref_instr_get_variable(deref);
453
/* Only interested in full deref chains */
457
const struct glsl_type *var_scalar_type = glsl_without_array(var->type);
458
if (deref->type == var_scalar_type || !glsl_type_is_scalar(var_scalar_type))
461
assert(deref->deref_type == nir_deref_type_var || deref->deref_type == nir_deref_type_array);
462
const struct glsl_type *old_glsl_type = deref->type;
463
nir_alu_type old_type = nir_get_nir_type_for_glsl_type(old_glsl_type);
464
nir_alu_type new_type = nir_get_nir_type_for_glsl_type(var_scalar_type);
465
if (glsl_get_bit_size(old_glsl_type) < glsl_get_bit_size(var_scalar_type)) {
466
deref->type = var_scalar_type;
467
if (intr->intrinsic == nir_intrinsic_load_deref) {
468
intr->dest.ssa.bit_size = glsl_get_bit_size(var_scalar_type);
469
b->cursor = nir_after_instr(instr);
470
nir_ssa_def *downcast = nir_type_convert(b, &intr->dest.ssa, new_type, old_type, nir_rounding_mode_undef);
471
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, downcast, downcast->parent_instr);
474
b->cursor = nir_before_instr(instr);
475
nir_ssa_def *upcast = nir_type_convert(b, intr->src[1].ssa, old_type, new_type, nir_rounding_mode_undef);
476
nir_src_rewrite_ssa(&intr->src[1], upcast);
479
while (deref->deref_type == nir_deref_type_array) {
480
nir_deref_instr *parent = nir_deref_instr_parent(deref);
481
parent->type = glsl_type_wrap_in_arrays(deref->type, parent->type);
485
/* Assumed arrays are already flattened */
486
b->cursor = nir_before_instr(&deref->instr);
487
nir_deref_instr *parent = nir_build_deref_var(b, var);
488
if (deref->deref_type == nir_deref_type_array)
489
deref = nir_build_deref_array(b, parent, nir_imul_imm(b, deref->arr.index.ssa, 2));
491
deref = nir_build_deref_array_imm(b, parent, 0);
492
nir_deref_instr *deref2 = nir_build_deref_array(b, parent,
493
nir_iadd_imm(b, deref->arr.index.ssa, 1));
494
b->cursor = nir_before_instr(instr);
495
if (intr->intrinsic == nir_intrinsic_load_deref) {
496
nir_ssa_def *src1 = nir_load_deref(b, deref);
497
nir_ssa_def *src2 = nir_load_deref(b, deref2);
498
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_pack_64_2x32_split(b, src1, src2));
500
nir_ssa_def *src1 = nir_unpack_64_2x32_split_x(b, intr->src[1].ssa);
501
nir_ssa_def *src2 = nir_unpack_64_2x32_split_y(b, intr->src[1].ssa);
502
nir_store_deref(b, deref, src1, 1);
503
nir_store_deref(b, deref, src2, 1);
505
nir_instr_remove(instr);
511
lower_var_bit_size_types(nir_variable *var, unsigned min_bit_size, unsigned max_bit_size)
513
assert(!glsl_type_is_array_of_arrays(var->type) && !glsl_type_is_struct(var->type));
514
const struct glsl_type *type = glsl_without_array(var->type);
515
assert(glsl_type_is_scalar(type));
516
enum glsl_base_type base_type = glsl_get_base_type(type);
517
if (glsl_base_type_get_bit_size(base_type) < min_bit_size) {
518
switch (min_bit_size) {
522
base_type = GLSL_TYPE_UINT16;
523
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
524
var->constant_initializer->elements[i]->values[0].u16 = var->constant_initializer->elements[i]->values[0].b ? 0xffff : 0;
527
base_type = GLSL_TYPE_INT16;
528
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
529
var->constant_initializer->elements[i]->values[0].i16 = var->constant_initializer->elements[i]->values[0].i8;
531
case GLSL_TYPE_UINT8: base_type = GLSL_TYPE_UINT16; break;
532
default: unreachable("Unexpected base type");
538
base_type = GLSL_TYPE_UINT;
539
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
540
var->constant_initializer->elements[i]->values[0].u32 = var->constant_initializer->elements[i]->values[0].b ? 0xffffffff : 0;
543
base_type = GLSL_TYPE_INT;
544
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
545
var->constant_initializer->elements[i]->values[0].i32 = var->constant_initializer->elements[i]->values[0].i8;
547
case GLSL_TYPE_INT16:
548
base_type = GLSL_TYPE_INT;
549
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
550
var->constant_initializer->elements[i]->values[0].i32 = var->constant_initializer->elements[i]->values[0].i16;
552
case GLSL_TYPE_FLOAT16:
553
base_type = GLSL_TYPE_FLOAT;
554
for (unsigned i = 0; i < (var->constant_initializer ? var->constant_initializer->num_elements : 0); ++i)
555
var->constant_initializer->elements[i]->values[0].f32 = _mesa_half_to_float(var->constant_initializer->elements[i]->values[0].u16);
557
case GLSL_TYPE_UINT8: base_type = GLSL_TYPE_UINT; break;
558
case GLSL_TYPE_UINT16: base_type = GLSL_TYPE_UINT; break;
559
default: unreachable("Unexpected base type");
562
default: unreachable("Unexpected min bit size");
564
var->type = glsl_type_wrap_in_arrays(glsl_scalar_type(base_type), var->type);
567
if (glsl_base_type_bit_size(base_type) > max_bit_size) {
568
assert(!glsl_type_is_array_of_arrays(var->type));
569
var->type = glsl_array_type(glsl_scalar_type(GLSL_TYPE_UINT),
570
glsl_type_is_array(var->type) ? glsl_get_length(var->type) * 2 : 2,
572
if (var->constant_initializer) {
573
unsigned num_elements = var->constant_initializer->num_elements ?
574
var->constant_initializer->num_elements * 2 : 2;
575
nir_constant **element_arr = ralloc_array(var, nir_constant *, num_elements);
576
nir_constant *elements = rzalloc_array(var, nir_constant, num_elements);
577
for (unsigned i = 0; i < var->constant_initializer->num_elements; ++i) {
578
element_arr[i*2] = &elements[i*2];
579
element_arr[i*2+1] = &elements[i*2+1];
580
const nir_const_value *src = var->constant_initializer->num_elements ?
581
var->constant_initializer->elements[i]->values : var->constant_initializer->values;
582
elements[i*2].values[0].u32 = (uint32_t)src->u64;
583
elements[i*2].is_null_constant = (uint32_t)src->u64 == 0;
584
elements[i*2+1].values[0].u32 = (uint32_t)(src->u64 >> 32);
585
elements[i*2+1].is_null_constant = (uint32_t)(src->u64 >> 32) == 0;
587
var->constant_initializer->num_elements = num_elements;
588
var->constant_initializer->elements = element_arr;
596
dxil_nir_lower_var_bit_size(nir_shader *shader, nir_variable_mode modes,
597
unsigned min_bit_size, unsigned max_bit_size)
599
bool progress = false;
600
nir_foreach_variable_with_modes(var, shader, modes & ~nir_var_function_temp)
601
progress |= lower_var_bit_size_types(var, min_bit_size, max_bit_size);
603
if (modes & nir_var_function_temp) {
604
nir_foreach_function(func, shader) {
607
nir_foreach_function_temp_variable(var, func->impl)
608
progress |= lower_var_bit_size_types(var, min_bit_size, max_bit_size);
615
nir_shader_instructions_pass(shader, lower_deref_bit_size,
616
nir_metadata_block_index |
617
nir_metadata_dominance |
618
nir_metadata_loop_analysis,
620
nir_remove_dead_derefs(shader);
625
lower_shared_atomic(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var)
627
b->cursor = nir_before_instr(&intr->instr);
691
629
assert(intr->src[0].is_ssa);
692
assert(intr->src[1].is_ssa);
694
b->cursor = nir_before_instr(&intr->instr);
696
nir_ssa_def *result =
697
build_load_ubo_dxil(b, intr->src[0].ssa, intr->src[1].ssa,
698
nir_dest_num_components(intr->dest),
699
nir_dest_bit_size(intr->dest),
700
nir_intrinsic_align(intr));
630
nir_ssa_def *offset =
631
nir_iadd_imm(b, intr->src[0].ssa, nir_intrinsic_base(intr));
632
nir_ssa_def *index = nir_ushr_imm(b, offset, 2);
634
nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, var), index);
636
if (intr->intrinsic == nir_intrinsic_shared_atomic_swap)
637
result = nir_deref_atomic_swap(b, 32, &deref->dest.ssa, intr->src[1].ssa, intr->src[2].ssa,
638
.atomic_op = nir_intrinsic_atomic_op(intr));
640
result = nir_deref_atomic(b, 32, &deref->dest.ssa, intr->src[1].ssa,
641
.atomic_op = nir_intrinsic_atomic_op(intr));
702
643
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
703
644
nir_instr_remove(&intr->instr);
725
681
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
727
683
switch (intr->intrinsic) {
728
case nir_intrinsic_load_deref:
729
progress |= lower_load_deref(&b, intr);
731
684
case nir_intrinsic_load_shared:
685
progress |= lower_32b_offset_load(&b, intr, shared_var);
732
687
case nir_intrinsic_load_scratch:
733
progress |= lower_32b_offset_load(&b, intr);
735
case nir_intrinsic_load_ssbo:
736
progress |= lower_load_ssbo(&b, intr, options->use_16bit_ssbo ? 16 : 32);
738
case nir_intrinsic_load_ubo:
739
progress |= lower_load_ubo(&b, intr);
688
progress |= lower_32b_offset_load(&b, intr, scratch_var);
741
690
case nir_intrinsic_store_shared:
691
progress |= lower_32b_offset_store(&b, intr, shared_var);
742
693
case nir_intrinsic_store_scratch:
743
progress |= lower_32b_offset_store(&b, intr);
745
case nir_intrinsic_store_ssbo:
746
progress |= lower_store_ssbo(&b, intr, options->use_16bit_ssbo ? 16 : 32);
759
lower_shared_atomic(nir_builder *b, nir_intrinsic_instr *intr,
760
nir_intrinsic_op dxil_op)
762
b->cursor = nir_before_instr(&intr->instr);
764
assert(intr->src[0].is_ssa);
765
nir_ssa_def *offset =
766
nir_iadd(b, intr->src[0].ssa, nir_imm_int(b, nir_intrinsic_base(intr)));
767
nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
769
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, dxil_op);
770
atomic->src[0] = nir_src_for_ssa(index);
771
assert(intr->src[1].is_ssa);
772
atomic->src[1] = nir_src_for_ssa(intr->src[1].ssa);
773
if (dxil_op == nir_intrinsic_shared_atomic_comp_swap_dxil) {
774
assert(intr->src[2].is_ssa);
775
atomic->src[2] = nir_src_for_ssa(intr->src[2].ssa);
777
atomic->num_components = 0;
778
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
780
nir_builder_instr_insert(b, &atomic->instr);
781
nir_ssa_def_rewrite_uses(&intr->dest.ssa, &atomic->dest.ssa);
782
nir_instr_remove(&intr->instr);
787
dxil_nir_lower_atomics_to_dxil(nir_shader *nir)
789
bool progress = false;
791
foreach_list_typed(nir_function, func, node, &nir->functions) {
792
if (!func->is_entrypoint)
797
nir_builder_init(&b, func->impl);
799
nir_foreach_block(block, func->impl) {
800
nir_foreach_instr_safe(instr, block) {
801
if (instr->type != nir_instr_type_intrinsic)
803
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
805
switch (intr->intrinsic) {
808
case nir_intrinsic_shared_atomic_##op: \
809
progress |= lower_shared_atomic(&b, intr, \
810
nir_intrinsic_shared_atomic_##op##_dxil); \
694
progress |= lower_32b_offset_store(&b, intr, scratch_var);
696
case nir_intrinsic_shared_atomic:
697
case nir_intrinsic_shared_atomic_swap:
698
progress |= lower_shared_atomic(&b, intr, shared_var);
706
if (nir->info.stage == MESA_SHADER_KERNEL) {
707
nir->info.cs.ptr_size = ptr_size;
2411
2282
nir_metadata_block_index | nir_metadata_dominance,
2287
move_consts(nir_builder *b, nir_instr *instr, void *data)
2289
bool progress = false;
2290
switch (instr->type) {
2291
case nir_instr_type_load_const: {
2292
/* Sink load_const to their uses if there's multiple */
2293
nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
2294
if (!list_is_singular(&load_const->def.uses)) {
2295
nir_foreach_use_safe(src, &load_const->def) {
2296
b->cursor = nir_before_src(src);
2297
nir_load_const_instr *new_load = nir_load_const_instr_create(b->shader,
2298
load_const->def.num_components,
2299
load_const->def.bit_size);
2300
memcpy(new_load->value, load_const->value, sizeof(load_const->value[0]) * load_const->def.num_components);
2301
nir_builder_instr_insert(b, &new_load->instr);
2302
nir_src_rewrite_ssa(src, &new_load->def);
2313
/* Sink all consts so that they have only have a single use.
2314
* The DXIL backend will already de-dupe the constants to the
2315
* same dxil_value if they have the same type, but this allows a single constant
2316
* to have different types without bitcasts. */
2318
dxil_nir_move_consts(nir_shader *s)
2320
return nir_shader_instructions_pass(s, move_consts,
2321
nir_metadata_block_index | nir_metadata_dominance,
2326
clear_pass_flags(nir_function_impl *impl)
2328
nir_foreach_block(block, impl) {
2329
nir_foreach_instr(instr, block) {
2330
instr->pass_flags = 0;
2336
add_dest_to_worklist(nir_dest *dest, void *state)
2338
assert(dest->is_ssa);
2339
nir_foreach_use_including_if(src, &dest->ssa) {
2340
assert(src->is_ssa);
2342
nir_if *nif = src->parent_if;
2343
nir_foreach_block_in_cf_node(block, &nif->cf_node) {
2344
nir_foreach_instr(instr, block)
2345
nir_instr_worklist_push_tail(state, instr);
2348
nir_instr_worklist_push_tail(state, src->parent_instr);
2354
set_input_bits(struct dxil_module *mod, nir_intrinsic_instr *intr, BITSET_WORD *input_bits, uint32_t ***tables, const uint32_t **table_sizes)
2356
if (intr->intrinsic == nir_intrinsic_load_view_index) {
2357
BITSET_SET(input_bits, 0);
2361
bool any_bits_set = false;
2362
nir_src *row_src = intr->intrinsic == nir_intrinsic_load_per_vertex_input ? &intr->src[1] : &intr->src[0];
2363
bool is_patch_constant = mod->shader_kind == DXIL_DOMAIN_SHADER && intr->intrinsic == nir_intrinsic_load_input;
2364
const struct dxil_signature_record *sig_rec = is_patch_constant ?
2365
&mod->patch_consts[nir_intrinsic_base(intr)] :
2366
&mod->inputs[mod->input_mappings[nir_intrinsic_base(intr)]];
2367
if (is_patch_constant) {
2368
/* Redirect to the second I/O table */
2369
*tables = *tables + 1;
2370
*table_sizes = *table_sizes + 1;
2372
for (uint32_t component = 0; component < intr->num_components; ++component) {
2373
uint32_t base_element = 0;
2374
uint32_t num_elements = sig_rec->num_elements;
2375
if (nir_src_is_const(*row_src)) {
2376
base_element = (uint32_t)nir_src_as_uint(*row_src);
2379
for (uint32_t element = 0; element < num_elements; ++element) {
2380
uint32_t row = sig_rec->elements[element + base_element].reg;
2381
if (row == 0xffffffff)
2383
BITSET_SET(input_bits, row * 4 + component + nir_intrinsic_component(intr));
2384
any_bits_set = true;
2387
return any_bits_set;
2391
set_output_bits(struct dxil_module *mod, nir_intrinsic_instr *intr, BITSET_WORD *input_bits, uint32_t **tables, const uint32_t *table_sizes)
2393
bool any_bits_set = false;
2394
nir_src *row_src = intr->intrinsic == nir_intrinsic_store_per_vertex_output ? &intr->src[2] : &intr->src[1];
2395
bool is_patch_constant = mod->shader_kind == DXIL_HULL_SHADER && intr->intrinsic == nir_intrinsic_store_output;
2396
const struct dxil_signature_record *sig_rec = is_patch_constant ?
2397
&mod->patch_consts[nir_intrinsic_base(intr)] :
2398
&mod->outputs[nir_intrinsic_base(intr)];
2399
for (uint32_t component = 0; component < intr->num_components; ++component) {
2400
uint32_t base_element = 0;
2401
uint32_t num_elements = sig_rec->num_elements;
2402
if (nir_src_is_const(*row_src)) {
2403
base_element = (uint32_t)nir_src_as_uint(*row_src);
2406
for (uint32_t element = 0; element < num_elements; ++element) {
2407
uint32_t row = sig_rec->elements[element + base_element].reg;
2408
if (row == 0xffffffff)
2410
uint32_t stream = sig_rec->elements[element + base_element].stream;
2411
uint32_t table_idx = is_patch_constant ? 1 : stream;
2412
uint32_t *table = tables[table_idx];
2413
uint32_t output_component = component + nir_intrinsic_component(intr);
2414
uint32_t input_component;
2415
BITSET_FOREACH_SET(input_component, input_bits, 32 * 4) {
2416
uint32_t *table_for_input_component = table + table_sizes[table_idx] * input_component;
2417
BITSET_SET(table_for_input_component, row * 4 + output_component);
2418
any_bits_set = true;
2422
return any_bits_set;
2426
propagate_input_to_output_dependencies(struct dxil_module *mod, nir_intrinsic_instr *load_intr, uint32_t **tables, const uint32_t *table_sizes)
2428
/* Which input components are being loaded by this instruction */
2429
BITSET_DECLARE(input_bits, 32 * 4) = { 0 };
2430
if (!set_input_bits(mod, load_intr, input_bits, &tables, &table_sizes))
2433
nir_instr_worklist *worklist = nir_instr_worklist_create();
2434
nir_instr_worklist_push_tail(worklist, &load_intr->instr);
2435
bool any_bits_set = false;
2436
nir_foreach_instr_in_worklist(instr, worklist) {
2437
if (instr->pass_flags)
2440
instr->pass_flags = 1;
2441
nir_foreach_dest(instr, add_dest_to_worklist, worklist);
2442
switch (instr->type) {
2443
case nir_instr_type_jump: {
2444
nir_jump_instr *jump = nir_instr_as_jump(instr);
2445
switch (jump->type) {
2446
case nir_jump_break:
2447
case nir_jump_continue: {
2448
nir_cf_node *parent = &instr->block->cf_node;
2449
while (parent->type != nir_cf_node_loop)
2450
parent = parent->parent;
2451
nir_foreach_block_in_cf_node(block, parent)
2452
nir_foreach_instr(i, block)
2453
nir_instr_worklist_push_tail(worklist, i);
2457
unreachable("Don't expect any other jumps");
2461
case nir_instr_type_intrinsic: {
2462
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2463
switch (intr->intrinsic) {
2464
case nir_intrinsic_store_output:
2465
case nir_intrinsic_store_per_vertex_output:
2466
any_bits_set |= set_output_bits(mod, intr, input_bits, tables, table_sizes);
2468
/* TODO: Memory writes */
2479
nir_instr_worklist_destroy(worklist);
2480
return any_bits_set;
2483
/* For every input load, compute the set of output stores that it can contribute to.
2484
* If it contributes to a store to memory, If it's used for control flow, then any
2485
* instruction in the CFG that it impacts is considered to contribute.
2486
* Ideally, we should also handle stores to outputs/memory and then loads from that
2487
* output/memory, but this is non-trivial and unclear how much impact that would have. */
2489
dxil_nir_analyze_io_dependencies(struct dxil_module *mod, nir_shader *s)
2491
bool any_outputs = false;
2492
for (uint32_t i = 0; i < 4; ++i)
2493
any_outputs |= mod->num_psv_outputs[i] > 0;
2494
if (mod->shader_kind == DXIL_HULL_SHADER)
2495
any_outputs |= mod->num_psv_patch_consts > 0;
2499
bool any_bits_set = false;
2500
nir_foreach_function(func, s) {
2502
/* Hull shaders have a patch constant function */
2503
assert(func->is_entrypoint || s->info.stage == MESA_SHADER_TESS_CTRL);
2505
/* Pass 1: input/view ID -> output dependencies */
2506
nir_foreach_block(block, func->impl) {
2507
nir_foreach_instr(instr, block) {
2508
if (instr->type != nir_instr_type_intrinsic)
2510
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2511
uint32_t **tables = mod->io_dependency_table;
2512
const uint32_t *table_sizes = mod->dependency_table_dwords_per_input;
2513
switch (intr->intrinsic) {
2514
case nir_intrinsic_load_view_index:
2515
tables = mod->viewid_dependency_table;
2517
case nir_intrinsic_load_input:
2518
case nir_intrinsic_load_per_vertex_input:
2519
case nir_intrinsic_load_interpolated_input:
2525
clear_pass_flags(func->impl);
2526
any_bits_set |= propagate_input_to_output_dependencies(mod, intr, tables, table_sizes);
2530
/* Pass 2: output -> output dependencies */
2533
return any_bits_set;
2536
static enum pipe_format
2537
get_format_for_var(unsigned num_comps, enum glsl_base_type sampled_type)
2539
switch (sampled_type) {
2541
case GLSL_TYPE_INT64:
2542
case GLSL_TYPE_INT16:
2543
switch (num_comps) {
2544
case 1: return PIPE_FORMAT_R32_SINT;
2545
case 2: return PIPE_FORMAT_R32G32_SINT;
2546
case 3: return PIPE_FORMAT_R32G32B32_SINT;
2547
case 4: return PIPE_FORMAT_R32G32B32A32_SINT;
2548
default: unreachable("Invalid num_comps");
2550
case GLSL_TYPE_UINT:
2551
case GLSL_TYPE_UINT64:
2552
case GLSL_TYPE_UINT16:
2553
switch (num_comps) {
2554
case 1: return PIPE_FORMAT_R32_UINT;
2555
case 2: return PIPE_FORMAT_R32G32_UINT;
2556
case 3: return PIPE_FORMAT_R32G32B32_UINT;
2557
case 4: return PIPE_FORMAT_R32G32B32A32_UINT;
2558
default: unreachable("Invalid num_comps");
2560
case GLSL_TYPE_FLOAT:
2561
case GLSL_TYPE_FLOAT16:
2562
case GLSL_TYPE_DOUBLE:
2563
switch (num_comps) {
2564
case 1: return PIPE_FORMAT_R32_FLOAT;
2565
case 2: return PIPE_FORMAT_R32G32_FLOAT;
2566
case 3: return PIPE_FORMAT_R32G32B32_FLOAT;
2567
case 4: return PIPE_FORMAT_R32G32B32A32_FLOAT;
2568
default: unreachable("Invalid num_comps");
2570
default: unreachable("Invalid sampler return type");
2575
aoa_size(const struct glsl_type *type)
2577
return glsl_type_is_array(type) ? glsl_get_aoa_size(type) : 1;
2581
guess_image_format_for_var(nir_shader *s, nir_variable *var)
2583
const struct glsl_type *base_type = glsl_without_array(var->type);
2584
if (!glsl_type_is_image(base_type))
2586
if (var->data.image.format != PIPE_FORMAT_NONE)
2589
nir_foreach_function(func, s) {
2592
nir_foreach_block(block, func->impl) {
2593
nir_foreach_instr(instr, block) {
2594
if (instr->type != nir_instr_type_intrinsic)
2596
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2597
switch (intr->intrinsic) {
2598
case nir_intrinsic_image_deref_load:
2599
case nir_intrinsic_image_deref_store:
2600
case nir_intrinsic_image_deref_atomic:
2601
case nir_intrinsic_image_deref_atomic_swap:
2602
if (nir_intrinsic_get_var(intr, 0) != var)
2605
case nir_intrinsic_image_load:
2606
case nir_intrinsic_image_store:
2607
case nir_intrinsic_image_atomic:
2608
case nir_intrinsic_image_atomic_swap: {
2609
unsigned binding = nir_src_as_uint(intr->src[0]);
2610
if (binding < var->data.binding ||
2611
binding >= var->data.binding + aoa_size(var->type))
2620
switch (intr->intrinsic) {
2621
case nir_intrinsic_image_deref_load:
2622
case nir_intrinsic_image_load:
2623
case nir_intrinsic_image_deref_store:
2624
case nir_intrinsic_image_store:
2625
/* Increase unknown formats up to 4 components if a 4-component accessor is used */
2626
if (intr->num_components > util_format_get_nr_components(var->data.image.format))
2627
var->data.image.format = get_format_for_var(intr->num_components, glsl_get_sampler_result_type(base_type));
2630
/* If an atomic is used, the image format must be 1-component; return immediately */
2631
var->data.image.format = get_format_for_var(1, glsl_get_sampler_result_type(base_type));
2637
/* Dunno what it is, assume 4-component */
2638
if (var->data.image.format == PIPE_FORMAT_NONE)
2639
var->data.image.format = get_format_for_var(4, glsl_get_sampler_result_type(base_type));
2644
update_intrinsic_formats(nir_builder *b, nir_instr *instr, void *data)
2646
if (instr->type != nir_instr_type_intrinsic)
2648
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2649
if (!nir_intrinsic_has_format(intr))
2651
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2653
nir_variable *var = nir_deref_instr_get_variable(deref);
2655
nir_intrinsic_set_format(intr, var->data.image.format);
2659
if (!nir_intrinsic_has_range_base(intr))
2662
unsigned binding = nir_src_as_uint(intr->src[0]);
2663
nir_foreach_variable_with_modes(var, b->shader, nir_var_image) {
2664
if (var->data.binding <= binding &&
2665
var->data.binding + aoa_size(var->type) > binding) {
2666
nir_intrinsic_set_format(intr, var->data.image.format);
2674
dxil_nir_guess_image_formats(nir_shader *s)
2676
bool progress = false;
2677
nir_foreach_variable_with_modes(var, s, nir_var_image) {
2678
progress |= guess_image_format_for_var(s, var);
2680
nir_shader_instructions_pass(s, update_intrinsic_formats, nir_metadata_all, NULL);