1
/**************************************************************************
3
* Copyright 2019 Red Hat.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
13
* The above copyright notice and this permission notice shall be included
14
* in all copies or substantial portions of the Software.
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
**************************************************************************/
26
#include "lp_bld_nir.h"
27
#include "lp_bld_init.h"
28
#include "lp_bld_flow.h"
29
#include "lp_bld_logic.h"
30
#include "lp_bld_gather.h"
31
#include "lp_bld_const.h"
32
#include "lp_bld_struct.h"
33
#include "lp_bld_arit.h"
34
#include "lp_bld_bitarit.h"
35
#include "lp_bld_coro.h"
36
#include "lp_bld_printf.h"
37
#include "lp_bld_intr.h"
38
#include "util/u_cpu_detect.h"
39
#include "util/u_math.h"
41
static int bit_size_to_shift_size(int bit_size)
57
* combine the execution mask if there is one with the current mask.
60
mask_vec(struct lp_build_nir_context *bld_base)
62
struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
63
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
64
struct lp_exec_mask *exec_mask = &bld->exec_mask;
65
LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
66
if (!exec_mask->has_mask) {
70
return exec_mask->exec_mask;
71
return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
72
exec_mask->exec_mask, "");
76
invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
78
struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
80
/* Fragment shaders may dispatch with invocation 0 inactive. All other
81
* stages have invocation 0 active at the top. (See
82
* lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
84
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
87
/* If we're in some control flow right now, then invocation 0 may be
90
if (bld->exec_mask.has_mask)
97
lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
100
return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
101
else if (bit_size == 16)
102
return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
103
else if (bit_size == 8)
104
return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
106
return lp_build_const_int32(gallivm, 0);
111
struct lp_build_nir_context * bld_base,
115
struct gallivm_state *gallivm = bld_base->base.gallivm;
116
LLVMBuilderRef builder = gallivm->builder;
119
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
120
int len = bld_base->base.type.length * 2;
121
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
123
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
124
#if UTIL_ARCH_LITTLE_ENDIAN
125
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
126
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
128
shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
129
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
132
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
134
return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
138
emit_store_64bit_split(struct lp_build_nir_context *bld_base,
140
LLVMValueRef split_values[2])
142
struct gallivm_state *gallivm = bld_base->base.gallivm;
143
LLVMBuilderRef builder = gallivm->builder;
145
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
146
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
147
int len = bld_base->base.type.length * 2;
149
value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
150
for (i = 0; i < bld_base->base.type.length; i++) {
151
#if UTIL_ARCH_LITTLE_ENDIAN
152
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
153
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
155
shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
156
shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
160
split_values[0] = LLVMBuildShuffleVector(builder, value,
161
LLVMGetUndef(LLVMTypeOf(value)),
162
LLVMConstVector(shuffles,
163
bld_base->base.type.length),
165
split_values[1] = LLVMBuildShuffleVector(builder, value,
166
LLVMGetUndef(LLVMTypeOf(value)),
167
LLVMConstVector(shuffles2,
168
bld_base->base.type.length),
173
emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
174
LLVMValueRef chan_ptr,
175
LLVMValueRef chan_ptr2,
178
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
179
struct lp_build_context *float_bld = &bld_base->base;
180
LLVMValueRef split_vals[2];
182
emit_store_64bit_split(bld_base, value, split_vals);
184
lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
185
lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
189
get_soa_array_offsets(struct lp_build_context *uint_bld,
190
LLVMValueRef indirect_index,
193
bool need_perelement_offset)
195
struct gallivm_state *gallivm = uint_bld->gallivm;
196
LLVMValueRef chan_vec =
197
lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
198
LLVMValueRef length_vec =
199
lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
200
LLVMValueRef index_vec;
202
/* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
203
index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
204
index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
205
index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
207
if (need_perelement_offset) {
208
LLVMValueRef pixel_offsets;
210
/* build pixel offset vector: {0, 1, 2, 3, ...} */
211
pixel_offsets = uint_bld->undef;
212
for (i = 0; i < uint_bld->type.length; i++) {
213
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
214
pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
217
index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
223
build_gather(struct lp_build_nir_context *bld_base,
224
struct lp_build_context *bld,
225
LLVMValueRef base_ptr,
226
LLVMValueRef indexes,
227
LLVMValueRef overflow_mask,
228
LLVMValueRef indexes2)
230
struct gallivm_state *gallivm = bld_base->base.gallivm;
231
LLVMBuilderRef builder = gallivm->builder;
232
struct lp_build_context *uint_bld = &bld_base->uint_bld;
237
res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
241
* overflow_mask is a vector telling us which channels
242
* in the vector overflowed. We use the overflow behavior for
243
* constant buffers which is defined as:
244
* Out of bounds access to constant buffer returns 0 in all
245
* components. Out of bounds behavior is always with respect
246
* to the size of the buffer bound at that slot.
251
* We avoid per-element control flow here (also due to llvm going crazy,
252
* though I suspect it's better anyway since overflow is likely rare).
253
* Note that since we still fetch from buffers even if num_elements was
254
* zero (in this case we'll fetch from index zero) the jit func callers
255
* MUST provide valid fake constant buffers of size 4x32 (the values do
256
* not matter), otherwise we'd still need (not per element though)
259
indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
261
indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
265
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
267
for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
270
LLVMValueRef scalar_ptr, scalar;
272
di = lp_build_const_int32(gallivm, i);
274
si = lp_build_const_int32(gallivm, i >> 1);
278
if (indexes2 && (i & 1)) {
279
index = LLVMBuildExtractElement(builder,
282
index = LLVMBuildExtractElement(builder,
285
scalar_ptr = LLVMBuildGEP(builder, base_ptr,
286
&index, 1, "gather_ptr");
287
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
289
res = LLVMBuildInsertElement(builder, res, scalar, di, "");
294
res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
295
overflow_mask = LLVMBuildSExt(builder, overflow_mask,
296
bld_base->dbl_bld.int_vec_type, "");
297
res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
298
bld_base->dbl_bld.zero, res);
300
res = lp_build_select(bld, overflow_mask, bld->zero, res);
307
* Scatter/store vector.
310
emit_mask_scatter(struct lp_build_nir_soa_context *bld,
311
LLVMValueRef base_ptr,
312
LLVMValueRef indexes,
314
struct lp_exec_mask *mask)
316
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
317
LLVMBuilderRef builder = gallivm->builder;
319
LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
322
* Loop over elements of index_vec, store scalar value.
324
for (i = 0; i < bld->bld_base.base.type.length; i++) {
325
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
326
LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
327
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
328
LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
329
LLVMValueRef scalar_pred = pred ?
330
LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
333
lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
334
ii, val, index, scalar_ptr);
337
LLVMValueRef real_val, dst_val;
338
dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
339
scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
340
real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
341
LLVMBuildStore(builder, real_val, scalar_ptr);
344
LLVMBuildStore(builder, val, scalar_ptr);
349
static void emit_load_var(struct lp_build_nir_context *bld_base,
350
nir_variable_mode deref_mode,
351
unsigned num_components,
354
unsigned vertex_index,
355
LLVMValueRef indir_vertex_index,
356
unsigned const_index,
357
LLVMValueRef indir_index,
358
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
360
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
361
struct gallivm_state *gallivm = bld_base->base.gallivm;
362
int dmul = bit_size == 64 ? 2 : 1;
363
unsigned location = var->data.driver_location;
364
unsigned location_frac = var->data.location_frac;
366
if (!var->data.compact && !indir_index)
367
location += const_index;
368
else if (var->data.compact) {
369
location += const_index / 4;
370
location_frac += const_index % 4;
373
switch (deref_mode) {
374
case nir_var_shader_in:
375
for (unsigned i = 0; i < num_components; i++) {
376
int idx = (i * dmul) + location_frac;
377
int comp_loc = location;
379
if (bit_size == 64 && idx >= 4) {
385
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
386
LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
387
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
388
LLVMValueRef result2;
390
result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
391
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
392
if (bit_size == 64) {
393
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394
result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
395
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
396
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
398
} else if (bld->tes_iface) {
399
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
400
LLVMValueRef attrib_index_val;
401
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
402
LLVMValueRef result2;
405
if (var->data.compact) {
406
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
407
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
409
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
411
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
413
if (var->data.patch) {
414
result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
415
indir_index ? true : false, attrib_index_val, swizzle_index_val);
416
if (bit_size == 64) {
417
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
418
result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
419
indir_index ? true : false, attrib_index_val, swizzle_index_val);
420
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
424
result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
425
indir_vertex_index ? true : false,
426
indir_vertex_index ? indir_vertex_index : vertex_index_val,
427
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
428
(indir_index && var->data.compact) ? true : false, swizzle_index_val);
429
if (bit_size == 64) {
430
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
431
result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
432
indir_vertex_index ? true : false,
433
indir_vertex_index ? indir_vertex_index : vertex_index_val,
434
indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
435
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
438
} else if (bld->tcs_iface) {
439
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
440
LLVMValueRef attrib_index_val;
441
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
444
if (var->data.compact) {
445
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
448
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
450
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451
result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
452
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
453
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
454
(indir_index && var->data.compact) ? true : false, swizzle_index_val);
455
if (bit_size == 64) {
456
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457
LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
458
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
459
indir_index ? true : false, attrib_index_val,
460
false, swizzle_index_val);
461
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
465
LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
466
LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
467
attrib_index_val, 4, idx,
469
LLVMValueRef index_vec2 = NULL;
470
LLVMTypeRef fptr_type;
471
LLVMValueRef inputs_array;
472
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
473
inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
476
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
477
indir_index, 4, idx + 1, TRUE);
479
/* Gather values from the input register array */
480
result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
482
if (bld->indirects & nir_var_shader_in) {
483
LLVMValueRef lindex = lp_build_const_int32(gallivm,
485
LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
486
bld->inputs_array, lindex);
487
if (bit_size == 64) {
488
LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
489
comp_loc * 4 + (idx + 1));
490
LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
491
bld->inputs_array, lindex2);
492
result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
494
result[i] = input_ptr;
497
if (bit_size == 64) {
499
tmp[0] = bld->inputs[comp_loc][idx];
500
tmp[1] = bld->inputs[comp_loc][idx + 1];
501
result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
503
result[i] = bld->inputs[comp_loc][idx];
510
case nir_var_shader_out:
511
if (bld->fs_iface && bld->fs_iface->fb_fetch) {
512
bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
515
for (unsigned i = 0; i < num_components; i++) {
516
int idx = (i * dmul) + location_frac;
517
if (bld->tcs_iface) {
518
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
519
LLVMValueRef attrib_index_val;
520
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
523
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
525
attrib_index_val = lp_build_const_int32(gallivm, location);
527
result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
528
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
529
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
530
(indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
531
if (bit_size == 64) {
532
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
533
LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
534
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
535
indir_index ? true : false, attrib_index_val,
536
false, swizzle_index_val, 0);
537
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
547
static void emit_store_chan(struct lp_build_nir_context *bld_base,
548
nir_variable_mode deref_mode,
550
unsigned location, unsigned comp,
554
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
555
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
556
struct lp_build_context *float_bld = &bld_base->base;
558
if (bit_size == 64) {
565
emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
566
bld->outputs[location][chan + 1], dst);
568
dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
569
lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
570
bld->outputs[location][chan + comp]);
574
static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
578
unsigned const_index,
579
LLVMValueRef indir_vertex_index,
580
LLVMValueRef indir_index,
583
LLVMValueRef chan_val)
585
struct gallivm_state *gallivm = bld_base->base.gallivm;
586
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
587
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
588
unsigned swizzle = chan;
589
if (bit_size == 64) {
598
LLVMValueRef attrib_index_val;
599
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
603
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
604
attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
606
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
608
attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
609
LLVMValueRef exec_mask = mask_vec(bld_base);
610
if (bit_size == 64) {
611
LLVMValueRef split_vals[2];
612
LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
613
emit_store_64bit_split(bld_base, chan_val, split_vals);
614
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
615
indir_vertex_index ? true : false,
617
indir_index ? true : false,
619
false, swizzle_index_val,
620
split_vals[0], exec_mask);
621
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
622
indir_vertex_index ? true : false,
624
indir_index ? true : false,
626
false, swizzle_index_val2,
627
split_vals[1], exec_mask);
629
chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
630
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
631
indir_vertex_index ? true : false,
633
indir_index && !is_compact ? true : false,
635
indir_index && is_compact ? true : false,
637
chan_val, exec_mask);
641
static void emit_store_var(struct lp_build_nir_context *bld_base,
642
nir_variable_mode deref_mode,
643
unsigned num_components,
647
LLVMValueRef indir_vertex_index,
648
unsigned const_index,
649
LLVMValueRef indir_index,
652
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
653
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
654
switch (deref_mode) {
655
case nir_var_shader_out: {
656
unsigned location = var->data.driver_location;
657
unsigned comp = var->data.location_frac;
658
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
659
if (var->data.location == FRAG_RESULT_STENCIL)
661
else if (var->data.location == FRAG_RESULT_DEPTH)
665
if (var->data.compact) {
666
location += const_index / 4;
667
comp += const_index % 4;
671
for (unsigned chan = 0; chan < num_components; chan++) {
672
if (writemask & (1u << chan)) {
673
LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
674
if (bld->tcs_iface) {
675
emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
677
emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
687
static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
688
struct lp_build_context *reg_bld,
689
const nir_reg_src *reg,
690
LLVMValueRef indir_src,
691
LLVMValueRef reg_storage)
693
struct gallivm_state *gallivm = bld_base->base.gallivm;
694
LLVMBuilderRef builder = gallivm->builder;
695
int nc = reg->reg->num_components;
696
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
697
struct lp_build_context *uint_bld = &bld_base->uint_bld;
698
if (reg->reg->num_array_elems) {
699
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
701
LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
702
indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
703
indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
705
reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
706
for (unsigned i = 0; i < nc; i++) {
707
LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708
vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
711
for (unsigned i = 0; i < nc; i++) {
712
LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
713
lp_build_const_int32(gallivm, i));
714
vals[i] = LLVMBuildLoad(builder, this_storage, "");
717
return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
720
static void emit_store_reg(struct lp_build_nir_context *bld_base,
721
struct lp_build_context *reg_bld,
722
const nir_reg_dest *reg,
724
LLVMValueRef indir_src,
725
LLVMValueRef reg_storage,
726
LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
728
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
729
struct gallivm_state *gallivm = bld_base->base.gallivm;
730
LLVMBuilderRef builder = gallivm->builder;
731
struct lp_build_context *uint_bld = &bld_base->uint_bld;
732
int nc = reg->reg->num_components;
733
if (reg->reg->num_array_elems > 0) {
734
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
736
LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
737
indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
738
indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
740
reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
741
for (unsigned i = 0; i < nc; i++) {
742
if (!(writemask & (1 << i)))
744
LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
745
dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
746
emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
751
for (unsigned i = 0; i < nc; i++) {
752
LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
753
lp_build_const_int32(gallivm, i));
754
dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
755
lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
759
static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
762
unsigned offset_bit_size,
763
bool offset_is_uniform,
765
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
767
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
768
struct gallivm_state *gallivm = bld_base->base.gallivm;
769
LLVMBuilderRef builder = gallivm->builder;
770
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
771
LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
772
unsigned size_shift = bit_size_to_shift_size(bit_size);
773
struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
775
offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
777
LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
778
kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
780
if (!invocation_0_must_be_active(bld_base)) {
781
mesa_logw_once("Treating load_kernel_arg in control flow as uniform, results may be incorrect.");
784
if (offset_is_uniform) {
785
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
787
for (unsigned c = 0; c < nc; c++) {
788
LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
790
LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
791
result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
796
static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
798
LLVMBuilderRef builder = gallivm->builder;
801
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
804
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
808
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
811
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
817
static void emit_load_global(struct lp_build_nir_context *bld_base,
820
unsigned addr_bit_size,
821
bool offset_is_uniform,
823
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
825
struct gallivm_state *gallivm = bld_base->base.gallivm;
826
LLVMBuilderRef builder = gallivm->builder;
827
struct lp_build_context *uint_bld = &bld_base->uint_bld;
828
struct lp_build_context *res_bld;
829
LLVMValueRef exec_mask = mask_vec(bld_base);
831
res_bld = get_int_bld(bld_base, true, bit_size);
833
if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
834
/* If the offset is uniform, then use the address from invocation 0 to
835
* load, and broadcast to all invocations.
837
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
838
lp_build_const_int32(gallivm, 0), "");
839
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
841
for (unsigned c = 0; c < nc; c++) {
842
LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
843
outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
848
for (unsigned c = 0; c < nc; c++) {
849
LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
850
struct lp_build_loop_state loop_state;
851
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
853
struct lp_build_if_state ifthen;
854
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
855
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
856
lp_build_if(&ifthen, gallivm, cond);
858
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
859
loop_state.counter, "");
860
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
862
LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
864
LLVMValueRef temp_res;
865
temp_res = LLVMBuildLoad(builder, result, "");
866
temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
867
LLVMBuildStore(builder, temp_res, result);
868
lp_build_endif(&ifthen);
869
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
871
outval[c] = LLVMBuildLoad(builder, result, "");
875
static void emit_store_global(struct lp_build_nir_context *bld_base,
877
unsigned nc, unsigned bit_size,
878
unsigned addr_bit_size,
882
struct gallivm_state *gallivm = bld_base->base.gallivm;
883
LLVMBuilderRef builder = gallivm->builder;
884
struct lp_build_context *uint_bld = &bld_base->uint_bld;
885
LLVMValueRef exec_mask = mask_vec(bld_base);
887
for (unsigned c = 0; c < nc; c++) {
888
if (!(writemask & (1u << c)))
890
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
892
struct lp_build_loop_state loop_state;
893
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
894
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
895
loop_state.counter, "");
897
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
898
loop_state.counter, "");
899
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
902
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
905
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
908
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
911
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
916
struct lp_build_if_state ifthen;
918
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
919
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
920
lp_build_if(&ifthen, gallivm, cond);
921
lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
922
lp_build_endif(&ifthen);
923
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
928
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
929
nir_intrinsic_op nir_op,
930
unsigned addr_bit_size,
931
unsigned val_bit_size,
933
LLVMValueRef val, LLVMValueRef val2,
934
LLVMValueRef *result)
936
struct gallivm_state *gallivm = bld_base->base.gallivm;
937
LLVMBuilderRef builder = gallivm->builder;
938
struct lp_build_context *uint_bld = &bld_base->uint_bld;
939
struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
940
LLVMValueRef atom_res = lp_build_alloca(gallivm,
941
LLVMTypeOf(val), "");
942
LLVMValueRef exec_mask = mask_vec(bld_base);
943
struct lp_build_loop_state loop_state;
944
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
946
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
947
loop_state.counter, "");
949
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
950
loop_state.counter, "");
951
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
952
struct lp_build_if_state ifthen;
953
LLVMValueRef cond, temp_res;
955
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
956
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
957
lp_build_if(&ifthen, gallivm, cond);
959
addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
960
if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
961
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
962
loop_state.counter, "");
963
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
964
scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
966
LLVMAtomicOrderingSequentiallyConsistent,
967
LLVMAtomicOrderingSequentiallyConsistent,
969
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
971
LLVMAtomicRMWBinOp op;
973
case nir_intrinsic_global_atomic_add:
974
op = LLVMAtomicRMWBinOpAdd;
976
case nir_intrinsic_global_atomic_exchange:
978
op = LLVMAtomicRMWBinOpXchg;
980
case nir_intrinsic_global_atomic_and:
981
op = LLVMAtomicRMWBinOpAnd;
983
case nir_intrinsic_global_atomic_or:
984
op = LLVMAtomicRMWBinOpOr;
986
case nir_intrinsic_global_atomic_xor:
987
op = LLVMAtomicRMWBinOpXor;
989
case nir_intrinsic_global_atomic_umin:
990
op = LLVMAtomicRMWBinOpUMin;
992
case nir_intrinsic_global_atomic_umax:
993
op = LLVMAtomicRMWBinOpUMax;
995
case nir_intrinsic_global_atomic_imin:
996
op = LLVMAtomicRMWBinOpMin;
998
case nir_intrinsic_global_atomic_imax:
999
op = LLVMAtomicRMWBinOpMax;
1002
unreachable("unknown atomic op");
1005
scalar = LLVMBuildAtomicRMW(builder, op,
1006
addr_ptr, value_ptr,
1007
LLVMAtomicOrderingSequentiallyConsistent,
1010
temp_res = LLVMBuildLoad(builder, atom_res, "");
1011
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1012
LLVMBuildStore(builder, temp_res, atom_res);
1013
lp_build_else(&ifthen);
1014
temp_res = LLVMBuildLoad(builder, atom_res, "");
1015
bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
1016
LLVMValueRef zero_val;
1018
if (val_bit_size == 64)
1019
zero_val = lp_build_const_double(gallivm, 0);
1021
zero_val = lp_build_const_float(gallivm, 0);
1023
if (val_bit_size == 64)
1024
zero_val = lp_build_const_int64(gallivm, 0);
1026
zero_val = lp_build_const_int32(gallivm, 0);
1029
temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1030
LLVMBuildStore(builder, temp_res, atom_res);
1031
lp_build_endif(&ifthen);
1032
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1034
*result = LLVMBuildLoad(builder, atom_res, "");
1037
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1040
bool offset_is_uniform,
1042
LLVMValueRef offset,
1043
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1045
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1046
struct gallivm_state *gallivm = bld_base->base.gallivm;
1047
LLVMBuilderRef builder = gallivm->builder;
1048
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1049
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1050
LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
1051
LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1052
unsigned size_shift = bit_size_to_shift_size(bit_size);
1054
offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1056
LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1057
consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1059
if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1060
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1061
struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1064
num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1067
num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1070
num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1074
for (unsigned c = 0; c < nc; c++) {
1075
LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1077
LLVMValueRef scalar;
1078
/* If loading outside the UBO, we need to skip the load and read 0 instead. */
1079
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1080
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1081
LLVMBuildStore(builder, zero, res_store);
1083
LLVMValueRef fetch_extent = LLVMBuildAdd(builder, chan_offset, lp_build_const_int32(gallivm, 1), "");
1084
LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, num_consts, fetch_extent, "");
1085
LLVMValueRef fetch_cond2 = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, chan_offset, lp_build_const_int32(gallivm, 0), "");
1086
LLVMValueRef fetch_cond_final = LLVMBuildAnd(gallivm->builder, fetch_cond, fetch_cond2, "");
1087
struct lp_build_if_state ifthen;
1088
lp_build_if(&ifthen, gallivm, fetch_cond_final);
1089
LLVMBuildStore(builder, lp_build_pointer_get(builder, consts_ptr, chan_offset), res_store);
1090
lp_build_endif(&ifthen);
1092
scalar = LLVMBuildLoad(builder, res_store, "");
1094
result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1097
LLVMValueRef overflow_mask;
1099
num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1101
num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1102
else if (bit_size == 16)
1103
num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1104
else if (bit_size == 8)
1105
num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1107
for (unsigned c = 0; c < nc; c++) {
1108
LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1109
overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1110
this_offset, num_consts);
1111
result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1117
emit_load_const(struct lp_build_nir_context *bld_base,
1118
const nir_load_const_instr *instr,
1119
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1121
struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1122
for (unsigned i = 0; i < instr->def.num_components; i++)
1123
outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1124
memset(&outval[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
1128
* Get the base address of SSBO[@index] for the @invocation channel, returning
1129
* the address and also the bounds (in units of the bit_size).
1132
ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1134
LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1136
struct gallivm_state *gallivm = bld_base->base.gallivm;
1137
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1138
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1140
LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1141
LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1142
LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1144
*bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1150
mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1151
struct lp_build_context *mem_bld,
1153
LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1155
struct gallivm_state *gallivm = bld_base->base.gallivm;
1156
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1160
ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1162
ptr = bld->shared_ptr;
1166
/* Cast it to the pointer type of the access this instruciton is doing. */
1170
return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1173
static void emit_load_mem(struct lp_build_nir_context *bld_base,
1176
bool index_and_offset_are_uniform,
1178
LLVMValueRef offset,
1179
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1181
struct gallivm_state *gallivm = bld_base->base.gallivm;
1182
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1183
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1184
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1185
struct lp_build_context *load_bld;
1186
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1188
load_bld = get_int_bld(bld_base, true, bit_size);
1190
offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1192
/* If the address is uniform, then use the address from invocation 0 to load,
1193
* and broadcast to all invocations.
1195
if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1196
LLVMValueRef ssbo_limit;
1197
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1198
lp_build_const_int32(gallivm, 0), &ssbo_limit);
1200
offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1202
for (unsigned c = 0; c < nc; c++) {
1203
LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1205
LLVMValueRef scalar;
1206
/* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1208
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1209
LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1210
LLVMBuildStore(builder, zero, res_store);
1212
LLVMValueRef fetch_extent = LLVMBuildAdd(builder, chan_offset, lp_build_const_int32(gallivm, 1), "");
1213
LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, ssbo_limit, fetch_extent, "");
1214
LLVMValueRef fetch_cond2 = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, chan_offset, lp_build_const_int32(gallivm, 0), "");
1215
LLVMValueRef fetch_cond_final = LLVMBuildAnd(gallivm->builder, fetch_cond, fetch_cond2, "");
1216
struct lp_build_if_state ifthen;
1217
lp_build_if(&ifthen, gallivm, fetch_cond_final);
1218
LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
1219
lp_build_endif(&ifthen);
1221
scalar = LLVMBuildLoad(builder, res_store, "");
1223
scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
1226
outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1231
/* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1233
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1234
for (unsigned c = 0; c < nc; c++)
1235
result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1237
LLVMValueRef exec_mask = mask_vec(bld_base);
1238
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1239
struct lp_build_loop_state loop_state;
1240
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1241
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1242
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1244
struct lp_build_if_state exec_ifthen;
1245
lp_build_if(&exec_ifthen, gallivm, loop_cond);
1247
LLVMValueRef ssbo_limit;
1248
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1249
loop_state.counter, &ssbo_limit);
1251
for (unsigned c = 0; c < nc; c++) {
1252
LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1253
LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1255
LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1256
do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1259
struct lp_build_if_state ifthen;
1260
LLVMValueRef fetch_cond, temp_res;
1262
fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1264
lp_build_if(&ifthen, gallivm, fetch_cond);
1265
LLVMValueRef scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1267
temp_res = LLVMBuildLoad(builder, result[c], "");
1268
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1269
LLVMBuildStore(builder, temp_res, result[c]);
1270
lp_build_else(&ifthen);
1271
temp_res = LLVMBuildLoad(builder, result[c], "");
1272
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1273
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1274
LLVMBuildStore(builder, temp_res, result[c]);
1275
lp_build_endif(&ifthen);
1278
lp_build_endif(&exec_ifthen);
1279
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1281
for (unsigned c = 0; c < nc; c++)
1282
outval[c] = LLVMBuildLoad(gallivm->builder, result[c], "");
1286
static void emit_store_mem(struct lp_build_nir_context *bld_base,
1291
LLVMValueRef offset,
1294
struct gallivm_state *gallivm = bld_base->base.gallivm;
1295
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1296
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1297
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1298
struct lp_build_context *store_bld;
1299
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1300
store_bld = get_int_bld(bld_base, true, bit_size);
1302
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1304
LLVMValueRef exec_mask = mask_vec(bld_base);
1305
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1306
struct lp_build_loop_state loop_state;
1307
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1308
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1309
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1311
struct lp_build_if_state exec_ifthen;
1312
lp_build_if(&exec_ifthen, gallivm, loop_cond);
1314
LLVMValueRef ssbo_limit;
1315
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1316
loop_state.counter, &ssbo_limit);
1318
for (unsigned c = 0; c < nc; c++) {
1319
if (!(writemask & (1u << c)))
1321
LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1322
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1323
LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1326
LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1327
do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1330
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1331
loop_state.counter, "");
1332
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1333
struct lp_build_if_state ifthen;
1334
LLVMValueRef store_cond;
1336
store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1337
lp_build_if(&ifthen, gallivm, store_cond);
1338
lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1339
lp_build_endif(&ifthen);
1342
lp_build_endif(&exec_ifthen);
1343
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1348
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1349
nir_intrinsic_op nir_op,
1351
LLVMValueRef index, LLVMValueRef offset,
1352
LLVMValueRef val, LLVMValueRef val2,
1353
LLVMValueRef *result)
1355
struct gallivm_state *gallivm = bld_base->base.gallivm;
1356
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1357
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1358
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1359
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1360
struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1362
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1363
LLVMValueRef atom_res = lp_build_alloca(gallivm,
1364
atomic_bld->vec_type, "");
1366
LLVMValueRef exec_mask = mask_vec(bld_base);
1367
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1368
struct lp_build_loop_state loop_state;
1369
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1370
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1371
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1373
struct lp_build_if_state exec_ifthen;
1374
lp_build_if(&exec_ifthen, gallivm, loop_cond);
1376
LLVMValueRef ssbo_limit;
1377
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index,
1378
loop_state.counter, &ssbo_limit);
1380
LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1382
LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1383
do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1386
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1387
loop_state.counter, "");
1388
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1390
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, "");
1392
struct lp_build_if_state ifthen;
1393
LLVMValueRef inner_cond, temp_res;
1394
LLVMValueRef scalar;
1396
inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1397
lp_build_if(&ifthen, gallivm, inner_cond);
1399
if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1400
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1401
loop_state.counter, "");
1402
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1403
scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1405
LLVMAtomicOrderingSequentiallyConsistent,
1406
LLVMAtomicOrderingSequentiallyConsistent,
1408
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1410
LLVMAtomicRMWBinOp op;
1413
case nir_intrinsic_shared_atomic_add:
1414
case nir_intrinsic_ssbo_atomic_add:
1415
op = LLVMAtomicRMWBinOpAdd;
1417
case nir_intrinsic_shared_atomic_exchange:
1418
case nir_intrinsic_ssbo_atomic_exchange:
1419
op = LLVMAtomicRMWBinOpXchg;
1421
case nir_intrinsic_shared_atomic_and:
1422
case nir_intrinsic_ssbo_atomic_and:
1423
op = LLVMAtomicRMWBinOpAnd;
1425
case nir_intrinsic_shared_atomic_or:
1426
case nir_intrinsic_ssbo_atomic_or:
1427
op = LLVMAtomicRMWBinOpOr;
1429
case nir_intrinsic_shared_atomic_xor:
1430
case nir_intrinsic_ssbo_atomic_xor:
1431
op = LLVMAtomicRMWBinOpXor;
1433
case nir_intrinsic_shared_atomic_umin:
1434
case nir_intrinsic_ssbo_atomic_umin:
1435
op = LLVMAtomicRMWBinOpUMin;
1437
case nir_intrinsic_shared_atomic_umax:
1438
case nir_intrinsic_ssbo_atomic_umax:
1439
op = LLVMAtomicRMWBinOpUMax;
1441
case nir_intrinsic_ssbo_atomic_imin:
1442
case nir_intrinsic_shared_atomic_imin:
1443
op = LLVMAtomicRMWBinOpMin;
1445
case nir_intrinsic_ssbo_atomic_imax:
1446
case nir_intrinsic_shared_atomic_imax:
1447
op = LLVMAtomicRMWBinOpMax;
1450
unreachable("unknown atomic op");
1452
scalar = LLVMBuildAtomicRMW(builder, op,
1453
scalar_ptr, value_ptr,
1454
LLVMAtomicOrderingSequentiallyConsistent,
1457
temp_res = LLVMBuildLoad(builder, atom_res, "");
1458
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1459
LLVMBuildStore(builder, temp_res, atom_res);
1460
lp_build_else(&ifthen);
1461
temp_res = LLVMBuildLoad(builder, atom_res, "");
1462
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1463
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1464
LLVMBuildStore(builder, temp_res, atom_res);
1465
lp_build_endif(&ifthen);
1467
lp_build_endif(&exec_ifthen);
1468
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1470
*result = LLVMBuildLoad(builder, atom_res, "");
1473
static void emit_barrier(struct lp_build_nir_context *bld_base)
1475
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1476
struct gallivm_state * gallivm = bld_base->base.gallivm;
1478
LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1480
lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1481
LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1484
static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1487
struct gallivm_state *gallivm = bld_base->base.gallivm;
1488
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1489
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1490
struct lp_build_context *bld_broad = &bld_base->uint_bld;
1491
LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1492
LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1493
return lp_build_broadcast_scalar(bld_broad, size_ptr);
1496
static void emit_image_op(struct lp_build_nir_context *bld_base,
1497
struct lp_img_params *params)
1499
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1500
struct gallivm_state *gallivm = bld_base->base.gallivm;
1502
params->type = bld_base->base.type;
1503
params->context_ptr = bld->context_ptr;
1504
params->thread_data_ptr = bld->thread_data_ptr;
1505
params->exec_mask = mask_vec(bld_base);
1507
if (params->image_index_offset)
1508
params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1509
lp_build_const_int32(gallivm, 0), "");
1511
bld->image->emit_op(bld->image,
1512
bld->bld_base.base.gallivm,
1517
static void emit_image_size(struct lp_build_nir_context *bld_base,
1518
struct lp_sampler_size_query_params *params)
1520
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1521
struct gallivm_state *gallivm = bld_base->base.gallivm;
1523
params->int_type = bld_base->int_bld.type;
1524
params->context_ptr = bld->context_ptr;
1526
if (params->texture_unit_offset)
1527
params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1528
lp_build_const_int32(gallivm, 0), "");
1529
bld->image->emit_size_query(bld->image,
1530
bld->bld_base.base.gallivm,
1535
static void init_var_slots(struct lp_build_nir_context *bld_base,
1536
nir_variable *var, unsigned sc)
1538
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1539
unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1543
for (unsigned comp = sc; comp < slots + sc; comp++) {
1544
unsigned this_loc = var->data.driver_location + (comp / 4);
1545
unsigned this_chan = comp % 4;
1547
if (!bld->outputs[this_loc][this_chan])
1548
bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1549
bld_base->base.vec_type, "output");
1553
static void emit_var_decl(struct lp_build_nir_context *bld_base,
1556
unsigned sc = var->data.location_frac;
1557
switch (var->data.mode) {
1558
case nir_var_shader_out: {
1559
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1560
if (var->data.location == FRAG_RESULT_STENCIL)
1562
else if (var->data.location == FRAG_RESULT_DEPTH)
1565
init_var_slots(bld_base, var, sc);
1573
static void emit_tex(struct lp_build_nir_context *bld_base,
1574
struct lp_sampler_params *params)
1576
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1577
struct gallivm_state *gallivm = bld_base->base.gallivm;
1578
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1580
params->type = bld_base->base.type;
1581
params->context_ptr = bld->context_ptr;
1582
params->thread_data_ptr = bld->thread_data_ptr;
1584
if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1585
/* this is horrible but this can be dynamic */
1586
LLVMValueRef coords[5];
1587
LLVMValueRef *orig_texel_ptr;
1588
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1589
LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1590
LLVMGetUndef(bld_base->base.vec_type),
1591
LLVMGetUndef(bld_base->base.vec_type),
1592
LLVMGetUndef(bld_base->base.vec_type) };
1593
LLVMValueRef texel[4], orig_offset, orig_lod;
1595
orig_texel_ptr = params->texel;
1596
orig_lod = params->lod;
1597
for (i = 0; i < 5; i++) {
1598
coords[i] = params->coords[i];
1600
orig_offset = params->texture_index_offset;
1602
for (unsigned v = 0; v < uint_bld->type.length; v++) {
1603
LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1604
LLVMValueRef new_coords[5];
1605
for (i = 0; i < 5; i++) {
1606
new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1607
coords[i], idx, "");
1609
params->coords = new_coords;
1610
params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1613
params->type = lp_elem_type(bld_base->base.type);
1616
params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1617
params->texel = texel;
1618
bld->sampler->emit_tex_sample(bld->sampler,
1622
for (i = 0; i < 4; i++) {
1623
result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1626
for (i = 0; i < 4; i++) {
1627
orig_texel_ptr[i] = result[i];
1632
if (params->texture_index_offset) {
1633
struct lp_build_loop_state loop_state;
1634
LLVMValueRef exec_mask = mask_vec(bld_base);
1635
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1636
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1637
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1638
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1640
struct lp_build_if_state ifthen;
1641
lp_build_if(&ifthen, gallivm, if_cond);
1642
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1643
loop_state.counter, "");
1644
LLVMBuildStore(builder, value_ptr, res_store);
1645
lp_build_endif(&ifthen);
1646
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1648
LLVMValueRef idx_val = LLVMBuildLoad(builder, res_store, "");
1649
params->texture_index_offset = idx_val;
1652
params->type = bld_base->base.type;
1653
bld->sampler->emit_tex_sample(bld->sampler,
1654
bld->bld_base.base.gallivm,
1658
static void emit_tex_size(struct lp_build_nir_context *bld_base,
1659
struct lp_sampler_size_query_params *params)
1661
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1663
params->int_type = bld_base->int_bld.type;
1664
params->context_ptr = bld->context_ptr;
1666
if (params->texture_unit_offset)
1667
params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1668
params->texture_unit_offset,
1669
lp_build_const_int32(bld_base->base.gallivm, 0), "");
1670
bld->sampler->emit_size_query(bld->sampler,
1671
bld->bld_base.base.gallivm,
1675
static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1676
nir_intrinsic_instr *instr,
1677
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1679
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1680
struct gallivm_state *gallivm = bld_base->base.gallivm;
1681
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1682
switch (instr->intrinsic) {
1683
case nir_intrinsic_load_instance_id:
1684
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1686
case nir_intrinsic_load_base_instance:
1687
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1689
case nir_intrinsic_load_base_vertex:
1690
result[0] = bld->system_values.basevertex;
1692
case nir_intrinsic_load_first_vertex:
1693
result[0] = bld->system_values.firstvertex;
1695
case nir_intrinsic_load_vertex_id:
1696
result[0] = bld->system_values.vertex_id;
1698
case nir_intrinsic_load_primitive_id:
1699
result[0] = bld->system_values.prim_id;
1701
case nir_intrinsic_load_workgroup_id: {
1702
LLVMValueRef tmp[3];
1703
for (unsigned i = 0; i < 3; i++) {
1704
tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1705
if (instr->dest.ssa.bit_size == 64)
1706
tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1707
result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1711
case nir_intrinsic_load_local_invocation_id:
1712
for (unsigned i = 0; i < 3; i++)
1713
result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1715
case nir_intrinsic_load_local_invocation_index: {
1716
LLVMValueRef tmp, tmp2;
1717
tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1718
tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1719
tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1720
tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1722
tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1723
tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1724
tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1725
tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1729
case nir_intrinsic_load_num_workgroups: {
1730
LLVMValueRef tmp[3];
1731
for (unsigned i = 0; i < 3; i++) {
1732
tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1733
if (instr->dest.ssa.bit_size == 64)
1734
tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1735
result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1739
case nir_intrinsic_load_invocation_id:
1740
if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1741
result[0] = bld->system_values.invocation_id;
1743
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1745
case nir_intrinsic_load_front_face:
1746
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1748
case nir_intrinsic_load_draw_id:
1749
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1753
case nir_intrinsic_load_workgroup_size:
1754
for (unsigned i = 0; i < 3; i++)
1755
result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1757
case nir_intrinsic_load_work_dim:
1758
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1760
case nir_intrinsic_load_tess_coord:
1761
for (unsigned i = 0; i < 3; i++) {
1762
result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1765
case nir_intrinsic_load_tess_level_outer:
1766
for (unsigned i = 0; i < 4; i++)
1767
result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1769
case nir_intrinsic_load_tess_level_inner:
1770
for (unsigned i = 0; i < 2; i++)
1771
result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1773
case nir_intrinsic_load_patch_vertices_in:
1774
result[0] = bld->system_values.vertices_in;
1776
case nir_intrinsic_load_sample_id:
1777
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1779
case nir_intrinsic_load_sample_pos:
1780
for (unsigned i = 0; i < 2; i++) {
1781
LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1782
idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1783
LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1784
result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1787
case nir_intrinsic_load_sample_mask_in:
1788
result[0] = bld->system_values.sample_mask_in;
1790
case nir_intrinsic_load_view_index:
1791
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1793
case nir_intrinsic_load_subgroup_invocation: {
1794
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1795
for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1796
elems[i] = lp_build_const_int32(gallivm, i);
1797
result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1800
case nir_intrinsic_load_subgroup_id:
1801
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1803
case nir_intrinsic_load_num_subgroups:
1804
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1809
static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1812
struct gallivm_state *gallivm = bld_base->base.gallivm;
1813
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1814
*dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1817
static void bgnloop(struct lp_build_nir_context *bld_base)
1819
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1820
lp_exec_bgnloop(&bld->exec_mask, true);
1823
static void endloop(struct lp_build_nir_context *bld_base)
1825
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1826
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1829
static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1831
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1832
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1833
lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1836
static void else_stmt(struct lp_build_nir_context *bld_base)
1838
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1839
lp_exec_mask_cond_invert(&bld->exec_mask);
1842
static void endif_stmt(struct lp_build_nir_context *bld_base)
1844
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1845
lp_exec_mask_cond_pop(&bld->exec_mask);
1848
static void break_stmt(struct lp_build_nir_context *bld_base)
1850
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1852
lp_exec_break(&bld->exec_mask, NULL, false);
1855
static void continue_stmt(struct lp_build_nir_context *bld_base)
1857
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1858
lp_exec_continue(&bld->exec_mask);
1861
static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1863
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1864
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1868
if (bld->exec_mask.has_mask) {
1869
mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1871
mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1874
mask = LLVMBuildNot(builder, cond, "");
1875
if (bld->exec_mask.has_mask) {
1876
LLVMValueRef invmask;
1877
invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1878
mask = LLVMBuildOr(builder, mask, invmask, "");
1881
lp_build_mask_update(bld->mask, mask);
1885
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1889
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1890
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1892
current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1894
LLVMBuildStore(builder, current_vec, ptr);
1898
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1902
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1903
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1905
current_vec = lp_build_select(&bld_base->uint_bld,
1907
bld_base->uint_bld.zero,
1910
LLVMBuildStore(builder, current_vec, ptr);
1914
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1915
LLVMValueRef current_mask_vec,
1916
LLVMValueRef total_emitted_vertices_vec)
1918
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1919
struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1920
LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1921
total_emitted_vertices_vec,
1922
bld->max_output_vertices_vec);
1924
return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1927
static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1929
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1930
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1932
if (stream_id >= bld->gs_vertex_streams)
1934
assert(bld->gs_iface->emit_vertex);
1935
LLVMValueRef total_emitted_vertices_vec =
1936
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1937
LLVMValueRef mask = mask_vec(bld_base);
1938
mask = clamp_mask_to_max_output_vertices(bld, mask,
1939
total_emitted_vertices_vec);
1940
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1942
total_emitted_vertices_vec,
1944
lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1946
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1948
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1953
end_primitive_masked(struct lp_build_nir_context * bld_base,
1954
LLVMValueRef mask, uint32_t stream_id)
1956
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1957
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1959
if (stream_id >= bld->gs_vertex_streams)
1961
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1962
LLVMValueRef emitted_vertices_vec =
1963
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1964
LLVMValueRef emitted_prims_vec =
1965
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1966
LLVMValueRef total_emitted_vertices_vec =
1967
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1969
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1971
emitted_vertices_vec,
1973
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1974
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1975
total_emitted_vertices_vec,
1976
emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1977
increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1979
clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1983
static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1985
ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1987
assert(bld->gs_iface->end_primitive);
1989
LLVMValueRef mask = mask_vec(bld_base);
1990
end_primitive_masked(bld_base, mask, stream_id);
1994
emit_prologue(struct lp_build_nir_soa_context *bld)
1996
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1997
if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1998
uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1999
unsigned index, chan;
2000
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2001
LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2002
bld->inputs_array = lp_build_array_alloca(gallivm,
2003
vec_type, array_size,
2006
for (index = 0; index < num_inputs; ++index) {
2007
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2008
LLVMValueRef lindex =
2009
lp_build_const_int32(gallivm, index * 4 + chan);
2010
LLVMValueRef input_ptr =
2011
LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2013
LLVMValueRef value = bld->inputs[index][chan];
2015
LLVMBuildStore(gallivm->builder, value, input_ptr);
2021
static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2022
nir_intrinsic_instr *instr, LLVMValueRef result[4])
2024
struct gallivm_state * gallivm = bld_base->base.gallivm;
2025
LLVMBuilderRef builder = gallivm->builder;
2026
uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2027
LLVMValueRef exec_mask = mask_vec(bld_base);
2028
struct lp_build_loop_state loop_state;
2029
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2031
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2032
LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2033
LLVMValueRef init_val = NULL;
2034
if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2035
instr->intrinsic == nir_intrinsic_vote_feq) {
2036
/* for equal we unfortunately have to loop and find the first valid one. */
2037
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2038
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2040
struct lp_build_if_state ifthen;
2041
lp_build_if(&ifthen, gallivm, if_cond);
2042
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2043
loop_state.counter, "");
2044
LLVMBuildStore(builder, value_ptr, eq_store);
2045
LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2046
lp_build_endif(&ifthen);
2047
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2049
init_val = LLVMBuildLoad(builder, eq_store, "");
2051
LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2055
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2056
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2057
loop_state.counter, "");
2058
struct lp_build_if_state ifthen;
2059
LLVMValueRef if_cond;
2060
if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2062
lp_build_if(&ifthen, gallivm, if_cond);
2063
res = LLVMBuildLoad(builder, res_store, "");
2065
if (instr->intrinsic == nir_intrinsic_vote_feq) {
2066
struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2067
LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2068
LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2069
LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2070
tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2071
res = LLVMBuildAnd(builder, res, tmp, "");
2072
} else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2073
LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2074
tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2075
res = LLVMBuildAnd(builder, res, tmp, "");
2076
} else if (instr->intrinsic == nir_intrinsic_vote_any)
2077
res = LLVMBuildOr(builder, res, value_ptr, "");
2079
res = LLVMBuildAnd(builder, res, value_ptr, "");
2080
LLVMBuildStore(builder, res, res_store);
2081
lp_build_endif(&ifthen);
2082
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2084
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
2087
static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2089
struct gallivm_state * gallivm = bld_base->base.gallivm;
2090
LLVMBuilderRef builder = gallivm->builder;
2091
LLVMValueRef exec_mask = mask_vec(bld_base);
2092
struct lp_build_loop_state loop_state;
2093
src = LLVMBuildAnd(builder, src, exec_mask, "");
2094
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2096
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2097
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2098
loop_state.counter, "");
2099
res = LLVMBuildLoad(builder, res_store, "");
2100
res = LLVMBuildOr(builder,
2102
LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2103
LLVMBuildStore(builder, res, res_store);
2105
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2107
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
2110
static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2112
struct gallivm_state *gallivm = bld_base->base.gallivm;
2113
LLVMBuilderRef builder = gallivm->builder;
2114
LLVMValueRef exec_mask = mask_vec(bld_base);
2115
struct lp_build_loop_state loop_state;
2117
LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2118
LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2119
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2120
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2121
loop_state.counter, "");
2122
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2125
lp_build_const_int32(gallivm, -1), "");
2126
LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2128
LLVMBuildLoad(builder, found_store, ""),
2129
lp_build_const_int32(gallivm, 0), "");
2131
cond = LLVMBuildAnd(builder, cond, cond2, "");
2132
struct lp_build_if_state ifthen;
2133
lp_build_if(&ifthen, gallivm, cond);
2134
LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2135
LLVMBuildStore(builder, loop_state.counter, idx_store);
2136
lp_build_endif(&ifthen);
2137
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2140
result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2141
lp_build_const_int32(gallivm, -1),
2142
LLVMBuildLoad(builder, idx_store, ""),
2146
#if LLVM_VERSION_MAJOR >= 10
2147
static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2148
nir_intrinsic_instr *instr, LLVMValueRef result[4])
2150
assert(instr->intrinsic == nir_intrinsic_shuffle);
2152
struct gallivm_state *gallivm = bld_base->base.gallivm;
2153
LLVMBuilderRef builder = gallivm->builder;
2154
uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2155
uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2156
struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2158
bool index_is_constant_data = LLVMIsAConstantAggregateZero(index) || LLVMIsAConstantDataSequential(index) || LLVMIsAUndefValue(index);
2160
if (index_is_constant_data) {
2161
/* freeze `src` in case inactive invocations contain poison */
2162
src = LLVMBuildFreeze(builder, src, "");
2163
result[0] = LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), index, "");
2164
} else if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2165
/* freeze `src` in case inactive invocations contain poison */
2166
src = LLVMBuildFreeze(builder, src, "");
2167
result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2169
LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2170
struct lp_build_loop_state loop_state;
2171
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2173
LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2175
LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2176
/* freeze `src_value` in case an out-of-bounds index or an index into an
2177
* inactive invocation results in poison
2179
src_value = LLVMBuildFreeze(builder, src_value, "");
2181
LLVMValueRef res = LLVMBuildLoad(builder, res_store, "");
2182
res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2183
LLVMBuildStore(builder, res, res_store);
2185
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2188
result[0] = LLVMBuildLoad(builder, res_store, "");
2193
static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2194
nir_intrinsic_instr *instr, LLVMValueRef result[4])
2196
struct gallivm_state *gallivm = bld_base->base.gallivm;
2197
LLVMBuilderRef builder = gallivm->builder;
2198
uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2199
/* can't use llvm reduction intrinsics because of exec_mask */
2200
LLVMValueRef exec_mask = mask_vec(bld_base);
2201
struct lp_build_loop_state loop_state;
2202
nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2204
LLVMValueRef res_store = NULL;
2205
LLVMValueRef scan_store;
2206
struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2208
if (instr->intrinsic != nir_intrinsic_reduce)
2209
res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2211
scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2213
struct lp_build_context elem_bld;
2214
bool is_flt = reduction_op == nir_op_fadd ||
2215
reduction_op == nir_op_fmul ||
2216
reduction_op == nir_op_fmin ||
2217
reduction_op == nir_op_fmax;
2218
bool is_unsigned = reduction_op == nir_op_umin ||
2219
reduction_op == nir_op_umax;
2221
struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2222
get_int_bld(bld_base, is_unsigned, bit_size);
2224
lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2226
LLVMValueRef store_val = NULL;
2228
* Put the identity value for the operation into the storage
2230
switch (reduction_op) {
2232
LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2233
(bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2234
store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2238
LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2239
(bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2240
store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2244
LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2245
(bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2246
store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2252
store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2255
store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2259
store_val = lp_build_const_int32(gallivm, UINT_MAX);
2262
store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2269
store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2272
store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2276
store_val = lp_build_const_int32(gallivm, INT_MAX);
2279
store_val = lp_build_const_int64(gallivm, INT64_MAX);
2286
store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2289
store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2293
store_val = lp_build_const_int32(gallivm, INT_MIN);
2296
store_val = lp_build_const_int64(gallivm, INT64_MIN);
2303
store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2306
store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2310
store_val = lp_build_const_int32(gallivm, 1);
2313
store_val = lp_build_const_int64(gallivm, 1);
2320
store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2323
store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2327
store_val = lp_build_const_int32(gallivm, 0xffffffff);
2330
store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2338
LLVMBuildStore(builder, store_val, scan_store);
2340
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2342
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2344
struct lp_build_if_state ifthen;
2345
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2346
lp_build_if(&ifthen, gallivm, if_cond);
2347
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2349
LLVMValueRef res = NULL;
2350
LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");
2351
if (instr->intrinsic != nir_intrinsic_reduce)
2352
res = LLVMBuildLoad(gallivm->builder, res_store, "");
2354
if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2355
res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2358
scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2359
value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2361
switch (reduction_op) {
2364
scan_val = lp_build_add(&elem_bld, value, scan_val);
2368
scan_val = lp_build_mul(&elem_bld, value, scan_val);
2373
scan_val = lp_build_min(&elem_bld, value, scan_val);
2378
scan_val = lp_build_max(&elem_bld, value, scan_val);
2381
scan_val = lp_build_and(&elem_bld, value, scan_val);
2384
scan_val = lp_build_or(&elem_bld, value, scan_val);
2387
scan_val = lp_build_xor(&elem_bld, value, scan_val);
2394
scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2395
LLVMBuildStore(builder, scan_val, scan_store);
2397
if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2398
res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2401
if (instr->intrinsic != nir_intrinsic_reduce)
2402
LLVMBuildStore(builder, res, res_store);
2403
lp_build_endif(&ifthen);
2405
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2407
if (instr->intrinsic == nir_intrinsic_reduce)
2408
result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));
2410
result[0] = LLVMBuildLoad(builder, res_store, "");
2413
static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2417
LLVMValueRef result[4])
2419
struct gallivm_state *gallivm = bld_base->base.gallivm;
2420
LLVMBuilderRef builder = gallivm->builder;
2422
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2424
/* have to find the first active invocation */
2425
LLVMValueRef exec_mask = mask_vec(bld_base);
2426
struct lp_build_loop_state loop_state;
2427
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2428
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2429
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2431
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2432
struct lp_build_if_state ifthen;
2434
lp_build_if(&ifthen, gallivm, if_cond);
2435
LLVMValueRef store_val = loop_state.counter;
2437
store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2438
LLVMBuildStore(builder, store_val, res_store);
2439
lp_build_endif(&ifthen);
2441
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2442
lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2443
idx = LLVMBuildLoad(builder, res_store, "");
2445
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2447
result[0] = lp_build_broadcast_scalar(uint_bld, value);
2451
emit_interp_at(struct lp_build_nir_context *bld_base,
2452
unsigned num_components,
2456
unsigned const_index,
2457
LLVMValueRef indir_index,
2458
LLVMValueRef offsets[2],
2459
LLVMValueRef dst[4])
2461
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2463
for (unsigned i = 0; i < num_components; i++) {
2464
dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2465
const_index + var->data.driver_location, i + var->data.location_frac,
2466
centroid, sample, indir_index, offsets);
2470
static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2471
struct lp_type type,
2472
unsigned scratch_size)
2474
LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2475
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2478
if (type.length == 1)
2479
return LLVMConstInt(elem_type, 0, 0);
2481
for (i = 0; i < type.length; ++i)
2482
elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2484
return LLVMConstVector(elems, type.length);
2488
emit_load_scratch(struct lp_build_nir_context *bld_base,
2489
unsigned nc, unsigned bit_size,
2490
LLVMValueRef offset,
2491
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2493
struct gallivm_state * gallivm = bld_base->base.gallivm;
2494
LLVMBuilderRef builder = gallivm->builder;
2495
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2496
struct lp_build_context *uint_bld = &bld_base->uint_bld;
2497
struct lp_build_context *load_bld;
2498
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2499
uint32_t shift_val = bit_size_to_shift_size(bit_size);
2500
LLVMValueRef exec_mask = mask_vec(bld_base);
2502
load_bld = get_int_bld(bld_base, true, bit_size);
2504
offset = lp_build_add(uint_bld, offset, thread_offsets);
2505
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2506
for (unsigned c = 0; c < nc; c++) {
2507
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2509
LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2510
struct lp_build_loop_state loop_state;
2511
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2513
struct lp_build_if_state ifthen;
2514
LLVMValueRef cond, temp_res;
2516
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2517
loop_state.counter, "");
2518
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2519
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2521
lp_build_if(&ifthen, gallivm, cond);
2522
LLVMValueRef scalar;
2523
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2524
scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2526
temp_res = LLVMBuildLoad(builder, result, "");
2527
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2528
LLVMBuildStore(builder, temp_res, result);
2529
lp_build_else(&ifthen);
2530
temp_res = LLVMBuildLoad(builder, result, "");
2531
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
2532
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2533
LLVMBuildStore(builder, temp_res, result);
2534
lp_build_endif(&ifthen);
2535
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2537
outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
2542
emit_store_scratch(struct lp_build_nir_context *bld_base,
2543
unsigned writemask, unsigned nc,
2544
unsigned bit_size, LLVMValueRef offset,
2547
struct gallivm_state * gallivm = bld_base->base.gallivm;
2548
LLVMBuilderRef builder = gallivm->builder;
2549
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2550
struct lp_build_context *uint_bld = &bld_base->uint_bld;
2551
struct lp_build_context *store_bld;
2552
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2553
uint32_t shift_val = bit_size_to_shift_size(bit_size);
2554
store_bld = get_int_bld(bld_base, true, bit_size);
2556
LLVMValueRef exec_mask = mask_vec(bld_base);
2557
offset = lp_build_add(uint_bld, offset, thread_offsets);
2558
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2560
for (unsigned c = 0; c < nc; c++) {
2561
if (!(writemask & (1u << c)))
2563
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2564
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2566
struct lp_build_loop_state loop_state;
2567
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2569
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2570
loop_state.counter, "");
2571
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2573
struct lp_build_if_state ifthen;
2576
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2577
loop_state.counter, "");
2579
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2580
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2581
lp_build_if(&ifthen, gallivm, cond);
2583
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2584
lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2586
lp_build_endif(&ifthen);
2587
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2592
void lp_build_nir_soa(struct gallivm_state *gallivm,
2593
struct nir_shader *shader,
2594
const struct lp_build_tgsi_params *params,
2595
LLVMValueRef (*outputs)[4])
2597
struct lp_build_nir_soa_context bld;
2598
struct lp_type type = params->type;
2599
struct lp_type res_type;
2601
assert(type.length <= LP_MAX_VECTOR_LENGTH);
2602
memset(&res_type, 0, sizeof res_type);
2603
res_type.width = type.width;
2604
res_type.length = type.length;
2607
/* Setup build context */
2608
memset(&bld, 0, sizeof bld);
2609
lp_build_context_init(&bld.bld_base.base, gallivm, type);
2610
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2611
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2612
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2613
lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2615
struct lp_type dbl_type;
2617
dbl_type.width *= 2;
2618
lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2621
struct lp_type half_type;
2623
half_type.width /= 2;
2624
lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2627
struct lp_type uint64_type;
2628
uint64_type = lp_uint_type(type);
2629
uint64_type.width *= 2;
2630
lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2633
struct lp_type int64_type;
2634
int64_type = lp_int_type(type);
2635
int64_type.width *= 2;
2636
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2639
struct lp_type uint16_type;
2640
uint16_type = lp_uint_type(type);
2641
uint16_type.width /= 2;
2642
lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2645
struct lp_type int16_type;
2646
int16_type = lp_int_type(type);
2647
int16_type.width /= 2;
2648
lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2651
struct lp_type uint8_type;
2652
uint8_type = lp_uint_type(type);
2653
uint8_type.width /= 4;
2654
lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2657
struct lp_type int8_type;
2658
int8_type = lp_int_type(type);
2659
int8_type.width /= 4;
2660
lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2662
bld.bld_base.load_var = emit_load_var;
2663
bld.bld_base.store_var = emit_store_var;
2664
bld.bld_base.load_reg = emit_load_reg;
2665
bld.bld_base.store_reg = emit_store_reg;
2666
bld.bld_base.emit_var_decl = emit_var_decl;
2667
bld.bld_base.load_ubo = emit_load_ubo;
2668
bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2669
bld.bld_base.load_global = emit_load_global;
2670
bld.bld_base.store_global = emit_store_global;
2671
bld.bld_base.atomic_global = emit_atomic_global;
2672
bld.bld_base.tex = emit_tex;
2673
bld.bld_base.tex_size = emit_tex_size;
2674
bld.bld_base.bgnloop = bgnloop;
2675
bld.bld_base.endloop = endloop;
2676
bld.bld_base.if_cond = if_cond;
2677
bld.bld_base.else_stmt = else_stmt;
2678
bld.bld_base.endif_stmt = endif_stmt;
2679
bld.bld_base.break_stmt = break_stmt;
2680
bld.bld_base.continue_stmt = continue_stmt;
2681
bld.bld_base.sysval_intrin = emit_sysval_intrin;
2682
bld.bld_base.discard = discard;
2683
bld.bld_base.emit_vertex = emit_vertex;
2684
bld.bld_base.end_primitive = end_primitive;
2685
bld.bld_base.load_mem = emit_load_mem;
2686
bld.bld_base.store_mem = emit_store_mem;
2687
bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2688
bld.bld_base.atomic_mem = emit_atomic_mem;
2689
bld.bld_base.barrier = emit_barrier;
2690
bld.bld_base.image_op = emit_image_op;
2691
bld.bld_base.image_size = emit_image_size;
2692
bld.bld_base.vote = emit_vote;
2693
bld.bld_base.elect = emit_elect;
2694
bld.bld_base.reduce = emit_reduce;
2695
bld.bld_base.ballot = emit_ballot;
2696
#if LLVM_VERSION_MAJOR >= 10
2697
bld.bld_base.shuffle = emit_shuffle;
2699
bld.bld_base.read_invocation = emit_read_invocation;
2700
bld.bld_base.helper_invocation = emit_helper_invocation;
2701
bld.bld_base.interp_at = emit_interp_at;
2702
bld.bld_base.load_scratch = emit_load_scratch;
2703
bld.bld_base.store_scratch = emit_store_scratch;
2704
bld.bld_base.load_const = emit_load_const;
2706
bld.mask = params->mask;
2707
bld.inputs = params->inputs;
2708
bld.outputs = outputs;
2709
bld.consts_ptr = params->consts_ptr;
2710
bld.const_sizes_ptr = params->const_sizes_ptr;
2711
bld.ssbo_ptr = params->ssbo_ptr;
2712
bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2713
bld.sampler = params->sampler;
2714
// bld.bld_base.info = params->info;
2716
bld.context_ptr = params->context_ptr;
2717
bld.thread_data_ptr = params->thread_data_ptr;
2718
bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2719
bld.image = params->image;
2720
bld.shared_ptr = params->shared_ptr;
2721
bld.coro = params->coro;
2722
bld.kernel_args_ptr = params->kernel_args;
2724
if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2725
bld.indirects |= nir_var_shader_in;
2727
bld.gs_iface = params->gs_iface;
2728
bld.tcs_iface = params->tcs_iface;
2729
bld.tes_iface = params->tes_iface;
2730
bld.fs_iface = params->fs_iface;
2732
struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2734
bld.gs_vertex_streams = params->gs_vertex_streams;
2735
bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2736
shader->info.gs.vertices_out);
2737
for (int i = 0; i < params->gs_vertex_streams; i++) {
2738
bld.emitted_prims_vec_ptr[i] =
2739
lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2740
bld.emitted_vertices_vec_ptr[i] =
2741
lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2742
bld.total_emitted_vertices_vec_ptr[i] =
2743
lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2746
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2748
bld.system_values = *params->system_values;
2750
bld.bld_base.shader = shader;
2752
if (shader->scratch_size) {
2753
bld.scratch_ptr = lp_build_array_alloca(gallivm,
2754
LLVMInt8TypeInContext(gallivm->context),
2755
lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2758
bld.scratch_size = shader->scratch_size;
2759
emit_prologue(&bld);
2760
lp_build_nir_llvm(&bld.bld_base, shader);
2763
LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2764
LLVMValueRef total_emitted_vertices_vec;
2765
LLVMValueRef emitted_prims_vec;
2767
for (int i = 0; i < params->gs_vertex_streams; i++) {
2768
end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2770
total_emitted_vertices_vec =
2771
LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2774
LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2775
bld.gs_iface->gs_epilogue(bld.gs_iface,
2776
total_emitted_vertices_vec,
2777
emitted_prims_vec, i);
2780
lp_exec_mask_fini(&bld.exec_mask);