~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/**************************************************************************
2
 
 *
3
 
 * Copyright 2019 Red Hat.
4
 
 * All Rights Reserved.
5
 
 *
6
 
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 
 * copy of this software and associated documentation files (the "Software"),
8
 
 * to deal in the Software without restriction, including without limitation
9
 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 
 * and/or sell copies of the Software, and to permit persons to whom the
11
 
 * Software is furnished to do so, subject to the following conditions:
12
 
 *
13
 
 * The above copyright notice and this permission notice shall be included
14
 
 * in all copies or substantial portions of the Software.
15
 
 *
16
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 
 * SOFTWARE.
23
 
 *
24
 
 **************************************************************************/
25
 
 
26
 
#include "lp_bld_nir.h"
27
 
#include "lp_bld_init.h"
28
 
#include "lp_bld_flow.h"
29
 
#include "lp_bld_logic.h"
30
 
#include "lp_bld_gather.h"
31
 
#include "lp_bld_const.h"
32
 
#include "lp_bld_struct.h"
33
 
#include "lp_bld_arit.h"
34
 
#include "lp_bld_bitarit.h"
35
 
#include "lp_bld_coro.h"
36
 
#include "lp_bld_printf.h"
37
 
#include "lp_bld_intr.h"
38
 
#include "util/u_cpu_detect.h"
39
 
#include "util/u_math.h"
40
 
 
41
 
static int bit_size_to_shift_size(int bit_size)
42
 
{
43
 
   switch (bit_size) {
44
 
   case 64:
45
 
      return 3;
46
 
   default:
47
 
   case 32:
48
 
      return 2;
49
 
   case 16:
50
 
      return 1;
51
 
   case 8:
52
 
      return 0;
53
 
   }
54
 
}
55
 
 
56
 
/*
57
 
 * combine the execution mask if there is one with the current mask.
58
 
 */
59
 
static LLVMValueRef
60
 
mask_vec(struct lp_build_nir_context *bld_base)
61
 
{
62
 
   struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
63
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
64
 
   struct lp_exec_mask *exec_mask = &bld->exec_mask;
65
 
   LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
66
 
   if (!exec_mask->has_mask) {
67
 
      return bld_mask;
68
 
   }
69
 
   if (!bld_mask)
70
 
      return exec_mask->exec_mask;
71
 
   return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
72
 
                       exec_mask->exec_mask, "");
73
 
}
74
 
 
75
 
static bool
76
 
invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
77
 
{
78
 
   struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
79
 
 
80
 
   /* Fragment shaders may dispatch with invocation 0 inactive.  All other
81
 
    * stages have invocation 0 active at the top.  (See
82
 
    * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
83
 
    */
84
 
   if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
85
 
      return false;
86
 
 
87
 
   /* If we're in some control flow right now, then invocation 0 may be
88
 
    * disabled.
89
 
    */
90
 
   if (bld->exec_mask.has_mask)
91
 
      return false;
92
 
 
93
 
   return true;
94
 
}
95
 
 
96
 
static LLVMValueRef
97
 
lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
98
 
{
99
 
   if (bit_size == 64)
100
 
      return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
101
 
   else if (bit_size == 16)
102
 
      return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
103
 
   else if (bit_size == 8)
104
 
      return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
105
 
   else
106
 
      return lp_build_const_int32(gallivm, 0);
107
 
}
108
 
 
109
 
static LLVMValueRef
110
 
emit_fetch_64bit(
111
 
   struct lp_build_nir_context * bld_base,
112
 
   LLVMValueRef input,
113
 
   LLVMValueRef input2)
114
 
{
115
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
116
 
   LLVMBuilderRef builder = gallivm->builder;
117
 
   LLVMValueRef res;
118
 
   int i;
119
 
   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
120
 
   int len = bld_base->base.type.length * 2;
121
 
   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
122
 
 
123
 
   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
124
 
#if UTIL_ARCH_LITTLE_ENDIAN
125
 
      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
126
 
      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
127
 
#else
128
 
      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
129
 
      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
130
 
#endif
131
 
   }
132
 
   res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
133
 
 
134
 
   return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
135
 
}
136
 
 
137
 
static void
138
 
emit_store_64bit_split(struct lp_build_nir_context *bld_base,
139
 
                       LLVMValueRef value,
140
 
                       LLVMValueRef split_values[2])
141
 
{
142
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
143
 
   LLVMBuilderRef builder = gallivm->builder;
144
 
   unsigned i;
145
 
   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
146
 
   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
147
 
   int len = bld_base->base.type.length * 2;
148
 
 
149
 
   value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
150
 
   for (i = 0; i < bld_base->base.type.length; i++) {
151
 
#if UTIL_ARCH_LITTLE_ENDIAN
152
 
      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
153
 
      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
154
 
#else
155
 
      shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
156
 
      shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
157
 
#endif
158
 
   }
159
 
 
160
 
   split_values[0] = LLVMBuildShuffleVector(builder, value,
161
 
                                 LLVMGetUndef(LLVMTypeOf(value)),
162
 
                                 LLVMConstVector(shuffles,
163
 
                                                 bld_base->base.type.length),
164
 
                                 "");
165
 
   split_values[1] = LLVMBuildShuffleVector(builder, value,
166
 
                                  LLVMGetUndef(LLVMTypeOf(value)),
167
 
                                  LLVMConstVector(shuffles2,
168
 
                                                  bld_base->base.type.length),
169
 
                                  "");
170
 
}
171
 
 
172
 
static void
173
 
emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
174
 
                      LLVMValueRef chan_ptr,
175
 
                      LLVMValueRef chan_ptr2,
176
 
                      LLVMValueRef value)
177
 
{
178
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
179
 
   struct lp_build_context *float_bld = &bld_base->base;
180
 
   LLVMValueRef split_vals[2];
181
 
 
182
 
   emit_store_64bit_split(bld_base, value, split_vals);
183
 
 
184
 
   lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
185
 
   lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
186
 
}
187
 
 
188
 
static LLVMValueRef
189
 
get_soa_array_offsets(struct lp_build_context *uint_bld,
190
 
                      LLVMValueRef indirect_index,
191
 
                      int num_components,
192
 
                      unsigned chan_index,
193
 
                      bool need_perelement_offset)
194
 
{
195
 
   struct gallivm_state *gallivm = uint_bld->gallivm;
196
 
   LLVMValueRef chan_vec =
197
 
      lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
198
 
   LLVMValueRef length_vec =
199
 
      lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
200
 
   LLVMValueRef index_vec;
201
 
 
202
 
   /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
203
 
   index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
204
 
   index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
205
 
   index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
206
 
 
207
 
   if (need_perelement_offset) {
208
 
      LLVMValueRef pixel_offsets;
209
 
      unsigned i;
210
 
     /* build pixel offset vector: {0, 1, 2, 3, ...} */
211
 
      pixel_offsets = uint_bld->undef;
212
 
      for (i = 0; i < uint_bld->type.length; i++) {
213
 
         LLVMValueRef ii = lp_build_const_int32(gallivm, i);
214
 
         pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
215
 
                                                ii, ii, "");
216
 
      }
217
 
      index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
218
 
   }
219
 
   return index_vec;
220
 
}
221
 
 
222
 
static LLVMValueRef
223
 
build_gather(struct lp_build_nir_context *bld_base,
224
 
             struct lp_build_context *bld,
225
 
             LLVMValueRef base_ptr,
226
 
             LLVMValueRef indexes,
227
 
             LLVMValueRef overflow_mask,
228
 
             LLVMValueRef indexes2)
229
 
{
230
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
231
 
   LLVMBuilderRef builder = gallivm->builder;
232
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
233
 
   LLVMValueRef res;
234
 
   unsigned i;
235
 
 
236
 
   if (indexes2)
237
 
      res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
238
 
   else
239
 
      res = bld->undef;
240
 
   /*
241
 
    * overflow_mask is a vector telling us which channels
242
 
    * in the vector overflowed. We use the overflow behavior for
243
 
    * constant buffers which is defined as:
244
 
    * Out of bounds access to constant buffer returns 0 in all
245
 
    * components. Out of bounds behavior is always with respect
246
 
    * to the size of the buffer bound at that slot.
247
 
    */
248
 
 
249
 
   if (overflow_mask) {
250
 
      /*
251
 
       * We avoid per-element control flow here (also due to llvm going crazy,
252
 
       * though I suspect it's better anyway since overflow is likely rare).
253
 
       * Note that since we still fetch from buffers even if num_elements was
254
 
       * zero (in this case we'll fetch from index zero) the jit func callers
255
 
       * MUST provide valid fake constant buffers of size 4x32 (the values do
256
 
       * not matter), otherwise we'd still need (not per element though)
257
 
       * control flow.
258
 
       */
259
 
      indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
260
 
      if (indexes2)
261
 
         indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
262
 
   }
263
 
 
264
 
   /*
265
 
    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
266
 
    */
267
 
   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
268
 
      LLVMValueRef si, di;
269
 
      LLVMValueRef index;
270
 
      LLVMValueRef scalar_ptr, scalar;
271
 
 
272
 
      di = lp_build_const_int32(gallivm, i);
273
 
      if (indexes2)
274
 
         si = lp_build_const_int32(gallivm, i >> 1);
275
 
      else
276
 
         si = di;
277
 
 
278
 
      if (indexes2 && (i & 1)) {
279
 
         index = LLVMBuildExtractElement(builder,
280
 
                                         indexes2, si, "");
281
 
      } else {
282
 
         index = LLVMBuildExtractElement(builder,
283
 
                                         indexes, si, "");
284
 
      }
285
 
      scalar_ptr = LLVMBuildGEP(builder, base_ptr,
286
 
                                &index, 1, "gather_ptr");
287
 
      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
288
 
 
289
 
      res = LLVMBuildInsertElement(builder, res, scalar, di, "");
290
 
   }
291
 
 
292
 
   if (overflow_mask) {
293
 
      if (indexes2) {
294
 
         res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
295
 
         overflow_mask = LLVMBuildSExt(builder, overflow_mask,
296
 
                                       bld_base->dbl_bld.int_vec_type, "");
297
 
         res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
298
 
                               bld_base->dbl_bld.zero, res);
299
 
      } else
300
 
         res = lp_build_select(bld, overflow_mask, bld->zero, res);
301
 
   }
302
 
 
303
 
   return res;
304
 
}
305
 
 
306
 
/**
307
 
 * Scatter/store vector.
308
 
 */
309
 
static void
310
 
emit_mask_scatter(struct lp_build_nir_soa_context *bld,
311
 
                  LLVMValueRef base_ptr,
312
 
                  LLVMValueRef indexes,
313
 
                  LLVMValueRef values,
314
 
                  struct lp_exec_mask *mask)
315
 
{
316
 
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
317
 
   LLVMBuilderRef builder = gallivm->builder;
318
 
   unsigned i;
319
 
   LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
320
 
 
321
 
   /*
322
 
    * Loop over elements of index_vec, store scalar value.
323
 
    */
324
 
   for (i = 0; i < bld->bld_base.base.type.length; i++) {
325
 
      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
326
 
      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
327
 
      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
328
 
      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
329
 
      LLVMValueRef scalar_pred = pred ?
330
 
         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
331
 
 
332
 
      if (0)
333
 
         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
334
 
                         ii, val, index, scalar_ptr);
335
 
 
336
 
      if (scalar_pred) {
337
 
         LLVMValueRef real_val, dst_val;
338
 
         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
339
 
         scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
340
 
         real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
341
 
         LLVMBuildStore(builder, real_val, scalar_ptr);
342
 
      }
343
 
      else {
344
 
         LLVMBuildStore(builder, val, scalar_ptr);
345
 
      }
346
 
   }
347
 
}
348
 
 
349
 
static void emit_load_var(struct lp_build_nir_context *bld_base,
350
 
                           nir_variable_mode deref_mode,
351
 
                           unsigned num_components,
352
 
                           unsigned bit_size,
353
 
                           nir_variable *var,
354
 
                           unsigned vertex_index,
355
 
                           LLVMValueRef indir_vertex_index,
356
 
                           unsigned const_index,
357
 
                           LLVMValueRef indir_index,
358
 
                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
359
 
{
360
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
361
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
362
 
   int dmul = bit_size == 64 ? 2 : 1;
363
 
   unsigned location = var->data.driver_location;
364
 
   unsigned location_frac = var->data.location_frac;
365
 
 
366
 
   if (!var->data.compact && !indir_index)
367
 
      location += const_index;
368
 
   else if (var->data.compact) {
369
 
      location += const_index / 4;
370
 
      location_frac += const_index % 4;
371
 
      const_index = 0;
372
 
   }
373
 
   switch (deref_mode) {
374
 
   case nir_var_shader_in:
375
 
      for (unsigned i = 0; i < num_components; i++) {
376
 
         int idx = (i * dmul) + location_frac;
377
 
         int comp_loc = location;
378
 
 
379
 
         if (bit_size == 64 && idx >= 4) {
380
 
            comp_loc++;
381
 
            idx = idx % 4;
382
 
         }
383
 
 
384
 
         if (bld->gs_iface) {
385
 
            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
386
 
            LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
387
 
            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
388
 
            LLVMValueRef result2;
389
 
 
390
 
            result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
391
 
                                                   false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
392
 
            if (bit_size == 64) {
393
 
               LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394
 
               result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
395
 
                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
396
 
               result[i] = emit_fetch_64bit(bld_base, result[i], result2);
397
 
            }
398
 
         } else if (bld->tes_iface) {
399
 
            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
400
 
            LLVMValueRef attrib_index_val;
401
 
            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
402
 
            LLVMValueRef result2;
403
 
 
404
 
            if (indir_index) {
405
 
               if (var->data.compact) {
406
 
                  swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
407
 
                  attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
408
 
               } else
409
 
                  attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
410
 
            } else
411
 
               attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
412
 
 
413
 
            if (var->data.patch) {
414
 
               result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
415
 
                                                             indir_index ? true : false, attrib_index_val, swizzle_index_val);
416
 
               if (bit_size == 64) {
417
 
                  LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
418
 
                  result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
419
 
                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
420
 
                  result[i] = emit_fetch_64bit(bld_base, result[i], result2);
421
 
               }
422
 
            }
423
 
            else {
424
 
               result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
425
 
                                                              indir_vertex_index ? true : false,
426
 
                                                              indir_vertex_index ? indir_vertex_index : vertex_index_val,
427
 
                                                              (indir_index && !var->data.compact) ? true : false, attrib_index_val,
428
 
                                                              (indir_index && var->data.compact) ? true : false, swizzle_index_val);
429
 
               if (bit_size == 64) {
430
 
                  LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
431
 
                  result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
432
 
                                                               indir_vertex_index ? true : false,
433
 
                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
434
 
                                                               indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
435
 
                  result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436
 
               }
437
 
            }
438
 
         } else if (bld->tcs_iface) {
439
 
            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
440
 
            LLVMValueRef attrib_index_val;
441
 
            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
442
 
 
443
 
            if (indir_index) {
444
 
               if (var->data.compact) {
445
 
                  swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446
 
                  attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447
 
               } else
448
 
                  attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449
 
            } else
450
 
               attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451
 
            result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
452
 
                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
453
 
                                                         (indir_index && !var->data.compact) ? true : false, attrib_index_val,
454
 
                                                         (indir_index && var->data.compact) ? true : false, swizzle_index_val);
455
 
            if (bit_size == 64) {
456
 
               LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457
 
               LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
458
 
                                                                       indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
459
 
                                                                       indir_index ? true : false, attrib_index_val,
460
 
                                                                       false, swizzle_index_val);
461
 
               result[i] = emit_fetch_64bit(bld_base, result[i], result2);
462
 
            }
463
 
         } else {
464
 
            if (indir_index) {
465
 
               LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
466
 
               LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
467
 
                                                              attrib_index_val, 4, idx,
468
 
                                                              TRUE);
469
 
               LLVMValueRef index_vec2 = NULL;
470
 
               LLVMTypeRef fptr_type;
471
 
               LLVMValueRef inputs_array;
472
 
               fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
473
 
               inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
474
 
 
475
 
               if (bit_size == 64)
476
 
                  index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
477
 
                                                     indir_index, 4, idx + 1, TRUE);
478
 
 
479
 
               /* Gather values from the input register array */
480
 
               result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
481
 
            } else {
482
 
               if (bld->indirects & nir_var_shader_in) {
483
 
                  LLVMValueRef lindex = lp_build_const_int32(gallivm,
484
 
                                                             comp_loc * 4 + idx);
485
 
                  LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
486
 
                                                             bld->inputs_array, lindex);
487
 
                  if (bit_size == 64) {
488
 
                     LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
489
 
                                                                 comp_loc * 4 + (idx + 1));
490
 
                     LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
491
 
                                                                    bld->inputs_array, lindex2);
492
 
                     result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
493
 
                  } else {
494
 
                     result[i] = input_ptr;
495
 
                  }
496
 
               } else {
497
 
                  if (bit_size == 64) {
498
 
                     LLVMValueRef tmp[2];
499
 
                     tmp[0] = bld->inputs[comp_loc][idx];
500
 
                     tmp[1] = bld->inputs[comp_loc][idx + 1];
501
 
                     result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
502
 
                  } else {
503
 
                     result[i] = bld->inputs[comp_loc][idx];
504
 
                  }
505
 
               }
506
 
            }
507
 
         }
508
 
      }
509
 
      break;
510
 
   case nir_var_shader_out:
511
 
      if (bld->fs_iface && bld->fs_iface->fb_fetch) {
512
 
         bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
513
 
         return;
514
 
      }
515
 
      for (unsigned i = 0; i < num_components; i++) {
516
 
         int idx = (i * dmul) + location_frac;
517
 
         if (bld->tcs_iface) {
518
 
            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
519
 
            LLVMValueRef attrib_index_val;
520
 
            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
521
 
 
522
 
            if (indir_index)
523
 
               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
524
 
            else
525
 
               attrib_index_val = lp_build_const_int32(gallivm, location);
526
 
 
527
 
            result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
528
 
                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
529
 
                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
530
 
                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
531
 
            if (bit_size == 64) {
532
 
               LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
533
 
               LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
534
 
                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
535
 
                                                                        indir_index ? true : false, attrib_index_val,
536
 
                                                                        false, swizzle_index_val, 0);
537
 
               result[i] = emit_fetch_64bit(bld_base, result[i], result2);
538
 
            }
539
 
         }
540
 
      }
541
 
      break;
542
 
   default:
543
 
      break;
544
 
   }
545
 
}
546
 
 
547
 
static void emit_store_chan(struct lp_build_nir_context *bld_base,
548
 
                            nir_variable_mode deref_mode,
549
 
                            unsigned bit_size,
550
 
                            unsigned location, unsigned comp,
551
 
                            unsigned chan,
552
 
                            LLVMValueRef dst)
553
 
{
554
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
555
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
556
 
   struct lp_build_context *float_bld = &bld_base->base;
557
 
 
558
 
   if (bit_size == 64) {
559
 
      chan *= 2;
560
 
      chan += comp;
561
 
      if (chan >= 4) {
562
 
         chan -= 4;
563
 
         location++;
564
 
      }
565
 
      emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
566
 
                            bld->outputs[location][chan + 1], dst);
567
 
   } else {
568
 
      dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
569
 
      lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
570
 
                         bld->outputs[location][chan + comp]);
571
 
   }
572
 
}
573
 
 
574
 
static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
575
 
                                bool is_compact,
576
 
                                unsigned bit_size,
577
 
                                unsigned location,
578
 
                                unsigned const_index,
579
 
                                LLVMValueRef indir_vertex_index,
580
 
                                LLVMValueRef indir_index,
581
 
                                unsigned comp,
582
 
                                unsigned chan,
583
 
                                LLVMValueRef chan_val)
584
 
{
585
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
586
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
587
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
588
 
   unsigned swizzle = chan;
589
 
   if (bit_size == 64) {
590
 
      swizzle *= 2;
591
 
      swizzle += comp;
592
 
      if (swizzle >= 4) {
593
 
         swizzle -= 4;
594
 
         location++;
595
 
      }
596
 
   } else
597
 
      swizzle += comp;
598
 
   LLVMValueRef attrib_index_val;
599
 
   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
600
 
 
601
 
   if (indir_index) {
602
 
      if (is_compact) {
603
 
         swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
604
 
         attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
605
 
      } else
606
 
         attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
607
 
   } else
608
 
      attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
609
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
610
 
   if (bit_size == 64) {
611
 
      LLVMValueRef split_vals[2];
612
 
      LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
613
 
      emit_store_64bit_split(bld_base, chan_val, split_vals);
614
 
      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
615
 
                                        indir_vertex_index ? true : false,
616
 
                                        indir_vertex_index,
617
 
                                        indir_index ? true : false,
618
 
                                        attrib_index_val,
619
 
                                        false, swizzle_index_val,
620
 
                                        split_vals[0], exec_mask);
621
 
      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
622
 
                                        indir_vertex_index ? true : false,
623
 
                                        indir_vertex_index,
624
 
                                        indir_index ? true : false,
625
 
                                        attrib_index_val,
626
 
                                        false, swizzle_index_val2,
627
 
                                        split_vals[1], exec_mask);
628
 
   } else {
629
 
      chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
630
 
      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
631
 
                                        indir_vertex_index ? true : false,
632
 
                                        indir_vertex_index,
633
 
                                        indir_index && !is_compact ? true : false,
634
 
                                        attrib_index_val,
635
 
                                        indir_index && is_compact ? true : false,
636
 
                                        swizzle_index_val,
637
 
                                        chan_val, exec_mask);
638
 
   }
639
 
}
640
 
 
641
 
static void emit_store_var(struct lp_build_nir_context *bld_base,
642
 
                           nir_variable_mode deref_mode,
643
 
                           unsigned num_components,
644
 
                           unsigned bit_size,
645
 
                           nir_variable *var,
646
 
                           unsigned writemask,
647
 
                           LLVMValueRef indir_vertex_index,
648
 
                           unsigned const_index,
649
 
                           LLVMValueRef indir_index,
650
 
                           LLVMValueRef dst)
651
 
{
652
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
653
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
654
 
   switch (deref_mode) {
655
 
   case nir_var_shader_out: {
656
 
      unsigned location = var->data.driver_location;
657
 
      unsigned comp = var->data.location_frac;
658
 
      if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
659
 
         if (var->data.location == FRAG_RESULT_STENCIL)
660
 
            comp = 1;
661
 
         else if (var->data.location == FRAG_RESULT_DEPTH)
662
 
            comp = 2;
663
 
      }
664
 
 
665
 
      if (var->data.compact) {
666
 
         location += const_index / 4;
667
 
         comp += const_index % 4;
668
 
         const_index = 0;
669
 
      }
670
 
 
671
 
      for (unsigned chan = 0; chan < num_components; chan++) {
672
 
         if (writemask & (1u << chan)) {
673
 
            LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
674
 
            if (bld->tcs_iface) {
675
 
               emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
676
 
            } else
677
 
               emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
678
 
         }
679
 
      }
680
 
      break;
681
 
   }
682
 
   default:
683
 
      break;
684
 
   }
685
 
}
686
 
 
687
 
static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
688
 
                                  struct lp_build_context *reg_bld,
689
 
                                  const nir_reg_src *reg,
690
 
                                  LLVMValueRef indir_src,
691
 
                                  LLVMValueRef reg_storage)
692
 
{
693
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
694
 
   LLVMBuilderRef builder = gallivm->builder;
695
 
   int nc = reg->reg->num_components;
696
 
   LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
697
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
698
 
   if (reg->reg->num_array_elems) {
699
 
      LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
700
 
      if (reg->indirect) {
701
 
         LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
702
 
         indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
703
 
         indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
704
 
      }
705
 
      reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
706
 
      for (unsigned i = 0; i < nc; i++) {
707
 
         LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708
 
         vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
709
 
      }
710
 
   } else {
711
 
      for (unsigned i = 0; i < nc; i++) {
712
 
         LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
713
 
                                                                                    lp_build_const_int32(gallivm, i));
714
 
         vals[i] = LLVMBuildLoad(builder, this_storage, "");
715
 
      }
716
 
   }
717
 
   return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
718
 
}
719
 
 
720
 
static void emit_store_reg(struct lp_build_nir_context *bld_base,
721
 
                           struct lp_build_context *reg_bld,
722
 
                           const nir_reg_dest *reg,
723
 
                           unsigned writemask,
724
 
                           LLVMValueRef indir_src,
725
 
                           LLVMValueRef reg_storage,
726
 
                           LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
727
 
{
728
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
729
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
730
 
   LLVMBuilderRef builder = gallivm->builder;
731
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
732
 
   int nc = reg->reg->num_components;
733
 
   if (reg->reg->num_array_elems > 0) {
734
 
      LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
735
 
      if (reg->indirect) {
736
 
         LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
737
 
         indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
738
 
         indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
739
 
      }
740
 
      reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
741
 
      for (unsigned i = 0; i < nc; i++) {
742
 
         if (!(writemask & (1 << i)))
743
 
            continue;
744
 
         LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
745
 
         dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
746
 
         emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
747
 
      }
748
 
      return;
749
 
   }
750
 
 
751
 
   for (unsigned i = 0; i < nc; i++) {
752
 
      LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
753
 
                                                         lp_build_const_int32(gallivm, i));
754
 
      dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
755
 
      lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
756
 
   }
757
 
}
758
 
 
759
 
static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
760
 
                                 unsigned nc,
761
 
                                 unsigned bit_size,
762
 
                                 unsigned offset_bit_size,
763
 
                                 bool offset_is_uniform,
764
 
                                 LLVMValueRef offset,
765
 
                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
766
 
{
767
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
768
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
769
 
   LLVMBuilderRef builder = gallivm->builder;
770
 
   struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
771
 
   LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
772
 
   unsigned size_shift = bit_size_to_shift_size(bit_size);
773
 
   struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
774
 
   if (size_shift)
775
 
      offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
776
 
 
777
 
   LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
778
 
   kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
779
 
 
780
 
   if (!invocation_0_must_be_active(bld_base)) {
781
 
      mesa_logw_once("Treating load_kernel_arg in control flow as uniform, results may be incorrect.");
782
 
   }
783
 
 
784
 
   if (offset_is_uniform) {
785
 
      offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
786
 
 
787
 
      for (unsigned c = 0; c < nc; c++) {
788
 
         LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
789
 
 
790
 
         LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
791
 
         result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
792
 
      }
793
 
   }
794
 
}
795
 
 
796
 
static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
797
 
{
798
 
   LLVMBuilderRef builder = gallivm->builder;
799
 
   switch (bit_size) {
800
 
   case 8:
801
 
      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
802
 
      break;
803
 
   case 16:
804
 
      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
805
 
      break;
806
 
   case 32:
807
 
   default:
808
 
      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
809
 
      break;
810
 
   case 64:
811
 
      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
812
 
      break;
813
 
   }
814
 
   return addr_ptr;
815
 
}
816
 
 
817
 
static void emit_load_global(struct lp_build_nir_context *bld_base,
818
 
                             unsigned nc,
819
 
                             unsigned bit_size,
820
 
                             unsigned addr_bit_size,
821
 
                             bool offset_is_uniform,
822
 
                             LLVMValueRef addr,
823
 
                             LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
824
 
{
825
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
826
 
   LLVMBuilderRef builder = gallivm->builder;
827
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
828
 
   struct lp_build_context *res_bld;
829
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
830
 
 
831
 
   res_bld = get_int_bld(bld_base, true, bit_size);
832
 
 
833
 
   if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
834
 
      /* If the offset is uniform, then use the address from invocation 0 to
835
 
       * load, and broadcast to all invocations.
836
 
       */
837
 
      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
838
 
                                                      lp_build_const_int32(gallivm, 0), "");
839
 
      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
840
 
 
841
 
      for (unsigned c = 0; c < nc; c++) {
842
 
         LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
843
 
         outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
844
 
      }
845
 
      return;
846
 
   }
847
 
 
848
 
   for (unsigned c = 0; c < nc; c++) {
849
 
      LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
850
 
      struct lp_build_loop_state loop_state;
851
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
852
 
 
853
 
      struct lp_build_if_state ifthen;
854
 
      LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
855
 
      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
856
 
      lp_build_if(&ifthen, gallivm, cond);
857
 
 
858
 
      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
859
 
                                                      loop_state.counter, "");
860
 
      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
861
 
 
862
 
      LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
863
 
 
864
 
      LLVMValueRef temp_res;
865
 
      temp_res = LLVMBuildLoad(builder, result, "");
866
 
      temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
867
 
      LLVMBuildStore(builder, temp_res, result);
868
 
      lp_build_endif(&ifthen);
869
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
870
 
                             NULL, LLVMIntUGE);
871
 
      outval[c] = LLVMBuildLoad(builder, result, "");
872
 
   }
873
 
}
874
 
 
875
 
static void emit_store_global(struct lp_build_nir_context *bld_base,
876
 
                              unsigned writemask,
877
 
                              unsigned nc, unsigned bit_size,
878
 
                              unsigned addr_bit_size,
879
 
                              LLVMValueRef addr,
880
 
                              LLVMValueRef dst)
881
 
{
882
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
883
 
   LLVMBuilderRef builder = gallivm->builder;
884
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
885
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
886
 
 
887
 
   for (unsigned c = 0; c < nc; c++) {
888
 
      if (!(writemask & (1u << c)))
889
 
         continue;
890
 
      LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
891
 
 
892
 
      struct lp_build_loop_state loop_state;
893
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
894
 
      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
895
 
                                                       loop_state.counter, "");
896
 
 
897
 
      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
898
 
                                                      loop_state.counter, "");
899
 
      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
900
 
      switch (bit_size) {
901
 
      case 8:
902
 
         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
903
 
         break;
904
 
      case 16:
905
 
         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
906
 
         break;
907
 
      case 32:
908
 
         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
909
 
         break;
910
 
      case 64:
911
 
         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
912
 
         break;
913
 
      default:
914
 
         break;
915
 
      }
916
 
      struct lp_build_if_state ifthen;
917
 
 
918
 
      LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
919
 
      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
920
 
      lp_build_if(&ifthen, gallivm, cond);
921
 
      lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
922
 
      lp_build_endif(&ifthen);
923
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
924
 
                             NULL, LLVMIntUGE);
925
 
   }
926
 
}
927
 
 
928
 
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
929
 
                               nir_intrinsic_op nir_op,
930
 
                               unsigned addr_bit_size,
931
 
                               unsigned val_bit_size,
932
 
                               LLVMValueRef addr,
933
 
                               LLVMValueRef val, LLVMValueRef val2,
934
 
                               LLVMValueRef *result)
935
 
{
936
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
937
 
   LLVMBuilderRef builder = gallivm->builder;
938
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
939
 
   struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
940
 
   LLVMValueRef atom_res = lp_build_alloca(gallivm,
941
 
                                           LLVMTypeOf(val), "");
942
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
943
 
   struct lp_build_loop_state loop_state;
944
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
945
 
 
946
 
   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
947
 
                                                    loop_state.counter, "");
948
 
 
949
 
   LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
950
 
                                                   loop_state.counter, "");
951
 
   addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
952
 
   struct lp_build_if_state ifthen;
953
 
   LLVMValueRef cond, temp_res;
954
 
   LLVMValueRef scalar;
955
 
   cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
956
 
   cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
957
 
   lp_build_if(&ifthen, gallivm, cond);
958
 
 
959
 
   addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
960
 
   if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
961
 
      LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
962
 
                                                         loop_state.counter, "");
963
 
      cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
964
 
      scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
965
 
                                      cas_src_ptr,
966
 
                                      LLVMAtomicOrderingSequentiallyConsistent,
967
 
                                      LLVMAtomicOrderingSequentiallyConsistent,
968
 
                                      false);
969
 
      scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
970
 
   } else {
971
 
      LLVMAtomicRMWBinOp op;
972
 
      switch (nir_op) {
973
 
      case nir_intrinsic_global_atomic_add:
974
 
         op = LLVMAtomicRMWBinOpAdd;
975
 
         break;
976
 
      case nir_intrinsic_global_atomic_exchange:
977
 
 
978
 
         op = LLVMAtomicRMWBinOpXchg;
979
 
         break;
980
 
      case nir_intrinsic_global_atomic_and:
981
 
         op = LLVMAtomicRMWBinOpAnd;
982
 
         break;
983
 
      case nir_intrinsic_global_atomic_or:
984
 
         op = LLVMAtomicRMWBinOpOr;
985
 
         break;
986
 
      case nir_intrinsic_global_atomic_xor:
987
 
         op = LLVMAtomicRMWBinOpXor;
988
 
         break;
989
 
      case nir_intrinsic_global_atomic_umin:
990
 
         op = LLVMAtomicRMWBinOpUMin;
991
 
         break;
992
 
      case nir_intrinsic_global_atomic_umax:
993
 
         op = LLVMAtomicRMWBinOpUMax;
994
 
         break;
995
 
      case nir_intrinsic_global_atomic_imin:
996
 
         op = LLVMAtomicRMWBinOpMin;
997
 
         break;
998
 
      case nir_intrinsic_global_atomic_imax:
999
 
         op = LLVMAtomicRMWBinOpMax;
1000
 
         break;
1001
 
      default:
1002
 
         unreachable("unknown atomic op");
1003
 
      }
1004
 
 
1005
 
      scalar = LLVMBuildAtomicRMW(builder, op,
1006
 
                                  addr_ptr, value_ptr,
1007
 
                                  LLVMAtomicOrderingSequentiallyConsistent,
1008
 
                                  false);
1009
 
   }
1010
 
   temp_res = LLVMBuildLoad(builder, atom_res, "");
1011
 
   temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1012
 
   LLVMBuildStore(builder, temp_res, atom_res);
1013
 
   lp_build_else(&ifthen);
1014
 
   temp_res = LLVMBuildLoad(builder, atom_res, "");
1015
 
   bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
1016
 
   LLVMValueRef zero_val;
1017
 
   if (is_float) {
1018
 
      if (val_bit_size == 64)
1019
 
         zero_val = lp_build_const_double(gallivm, 0);
1020
 
      else
1021
 
         zero_val = lp_build_const_float(gallivm, 0);
1022
 
   } else {
1023
 
      if (val_bit_size == 64)
1024
 
         zero_val = lp_build_const_int64(gallivm, 0);
1025
 
      else
1026
 
         zero_val = lp_build_const_int32(gallivm, 0);
1027
 
   }
1028
 
 
1029
 
   temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1030
 
   LLVMBuildStore(builder, temp_res, atom_res);
1031
 
   lp_build_endif(&ifthen);
1032
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1033
 
                          NULL, LLVMIntUGE);
1034
 
   *result = LLVMBuildLoad(builder, atom_res, "");
1035
 
}
1036
 
 
1037
 
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1038
 
                          unsigned nc,
1039
 
                          unsigned bit_size,
1040
 
                          bool offset_is_uniform,
1041
 
                          LLVMValueRef index,
1042
 
                          LLVMValueRef offset,
1043
 
                          LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1044
 
{
1045
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1046
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1047
 
   LLVMBuilderRef builder = gallivm->builder;
1048
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1049
 
   struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1050
 
   LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
1051
 
   LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1052
 
   unsigned size_shift = bit_size_to_shift_size(bit_size);
1053
 
   if (size_shift)
1054
 
      offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1055
 
 
1056
 
   LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1057
 
   consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1058
 
 
1059
 
   if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1060
 
      offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1061
 
      struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1062
 
      switch (bit_size) {
1063
 
      case 8:
1064
 
         num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1065
 
         break;
1066
 
      case 16:
1067
 
         num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1068
 
         break;
1069
 
      case 64:
1070
 
         num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1071
 
         break;
1072
 
      default: break;
1073
 
      }
1074
 
      for (unsigned c = 0; c < nc; c++) {
1075
 
         LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1076
 
 
1077
 
         LLVMValueRef scalar;
1078
 
         /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1079
 
         LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1080
 
         LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1081
 
         LLVMBuildStore(builder, zero, res_store);
1082
 
 
1083
 
         LLVMValueRef fetch_extent = LLVMBuildAdd(builder, chan_offset, lp_build_const_int32(gallivm, 1), "");
1084
 
         LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, num_consts, fetch_extent, "");
1085
 
         LLVMValueRef fetch_cond2 = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, chan_offset, lp_build_const_int32(gallivm, 0), "");
1086
 
         LLVMValueRef fetch_cond_final = LLVMBuildAnd(gallivm->builder, fetch_cond, fetch_cond2, "");
1087
 
         struct lp_build_if_state ifthen;
1088
 
         lp_build_if(&ifthen, gallivm, fetch_cond_final);
1089
 
         LLVMBuildStore(builder, lp_build_pointer_get(builder, consts_ptr, chan_offset), res_store);
1090
 
         lp_build_endif(&ifthen);
1091
 
 
1092
 
         scalar = LLVMBuildLoad(builder, res_store, "");
1093
 
 
1094
 
         result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1095
 
      }
1096
 
   } else {
1097
 
      LLVMValueRef overflow_mask;
1098
 
 
1099
 
      num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1100
 
      if (bit_size == 64)
1101
 
         num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1102
 
      else if (bit_size == 16)
1103
 
         num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1104
 
      else if (bit_size == 8)
1105
 
         num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1106
 
 
1107
 
      for (unsigned c = 0; c < nc; c++) {
1108
 
         LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1109
 
         overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1110
 
                                          this_offset, num_consts);
1111
 
         result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1112
 
      }
1113
 
   }
1114
 
}
1115
 
 
1116
 
static void
1117
 
emit_load_const(struct lp_build_nir_context *bld_base,
1118
 
                const nir_load_const_instr *instr,
1119
 
                LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1120
 
{
1121
 
   struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1122
 
   for (unsigned i = 0; i < instr->def.num_components; i++)
1123
 
     outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1124
 
   memset(&outval[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
1125
 
}
1126
 
 
1127
 
/**
1128
 
 * Get the base address of SSBO[@index] for the @invocation channel, returning
1129
 
 * the address and also the bounds (in units of the bit_size).
1130
 
 */
1131
 
static LLVMValueRef
1132
 
ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1133
 
                  unsigned bit_size,
1134
 
                  LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1135
 
{
1136
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1137
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1138
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
1139
 
 
1140
 
   LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1141
 
   LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1142
 
   LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1143
 
   if (bounds)
1144
 
      *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1145
 
 
1146
 
   return ssbo_ptr;
1147
 
}
1148
 
 
1149
 
static LLVMValueRef
1150
 
mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1151
 
                        struct lp_build_context *mem_bld,
1152
 
                        unsigned bit_size,
1153
 
                        LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1154
 
{
1155
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1156
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1157
 
   LLVMValueRef ptr;
1158
 
 
1159
 
   if (index) {
1160
 
      ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1161
 
   } else {
1162
 
      ptr = bld->shared_ptr;
1163
 
      *bounds = NULL;
1164
 
   }
1165
 
 
1166
 
   /* Cast it to the pointer type of the access this instruciton is doing. */
1167
 
   if (bit_size == 32)
1168
 
      return ptr;
1169
 
   else
1170
 
      return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1171
 
}
1172
 
 
1173
 
static void emit_load_mem(struct lp_build_nir_context *bld_base,
1174
 
                          unsigned nc,
1175
 
                          unsigned bit_size,
1176
 
                          bool index_and_offset_are_uniform,
1177
 
                          LLVMValueRef index,
1178
 
                          LLVMValueRef offset,
1179
 
                          LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1180
 
{
1181
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1182
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1183
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1184
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1185
 
   struct lp_build_context *load_bld;
1186
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
1187
 
 
1188
 
   load_bld = get_int_bld(bld_base, true, bit_size);
1189
 
 
1190
 
   offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1191
 
 
1192
 
   /* If the address is uniform, then use the address from invocation 0 to load,
1193
 
    * and broadcast to all invocations.
1194
 
    */
1195
 
   if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1196
 
      LLVMValueRef ssbo_limit;
1197
 
      LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1198
 
                                                     lp_build_const_int32(gallivm, 0), &ssbo_limit);
1199
 
 
1200
 
      offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1201
 
 
1202
 
      for (unsigned c = 0; c < nc; c++) {
1203
 
         LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1204
 
 
1205
 
         LLVMValueRef scalar;
1206
 
         /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1207
 
         if (ssbo_limit) {
1208
 
            LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1209
 
            LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1210
 
            LLVMBuildStore(builder, zero, res_store);
1211
 
 
1212
 
            LLVMValueRef fetch_extent = LLVMBuildAdd(builder, chan_offset, lp_build_const_int32(gallivm, 1), "");
1213
 
            LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, ssbo_limit, fetch_extent, "");
1214
 
            LLVMValueRef fetch_cond2 = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, chan_offset, lp_build_const_int32(gallivm, 0), "");
1215
 
            LLVMValueRef fetch_cond_final = LLVMBuildAnd(gallivm->builder, fetch_cond, fetch_cond2, "");
1216
 
            struct lp_build_if_state ifthen;
1217
 
            lp_build_if(&ifthen, gallivm, fetch_cond_final);
1218
 
            LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
1219
 
            lp_build_endif(&ifthen);
1220
 
 
1221
 
            scalar = LLVMBuildLoad(builder, res_store, "");
1222
 
         } else {
1223
 
            scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
1224
 
         }
1225
 
 
1226
 
         outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1227
 
      }
1228
 
      return;
1229
 
   }
1230
 
 
1231
 
   /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1232
 
 
1233
 
   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1234
 
   for (unsigned c = 0; c < nc; c++)
1235
 
      result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1236
 
 
1237
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
1238
 
   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1239
 
   struct lp_build_loop_state loop_state;
1240
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1241
 
   LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1242
 
   LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1243
 
 
1244
 
   struct lp_build_if_state exec_ifthen;
1245
 
   lp_build_if(&exec_ifthen, gallivm, loop_cond);
1246
 
 
1247
 
   LLVMValueRef ssbo_limit;
1248
 
   LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1249
 
                                                  loop_state.counter, &ssbo_limit);
1250
 
 
1251
 
   for (unsigned c = 0; c < nc; c++) {
1252
 
      LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1253
 
      LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1254
 
      if (ssbo_limit) {
1255
 
         LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1256
 
         do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1257
 
      }
1258
 
 
1259
 
      struct lp_build_if_state ifthen;
1260
 
      LLVMValueRef fetch_cond, temp_res;
1261
 
 
1262
 
      fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1263
 
 
1264
 
      lp_build_if(&ifthen, gallivm, fetch_cond);
1265
 
      LLVMValueRef scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1266
 
 
1267
 
      temp_res = LLVMBuildLoad(builder, result[c], "");
1268
 
      temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1269
 
      LLVMBuildStore(builder, temp_res, result[c]);
1270
 
      lp_build_else(&ifthen);
1271
 
      temp_res = LLVMBuildLoad(builder, result[c], "");
1272
 
      LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1273
 
      temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1274
 
      LLVMBuildStore(builder, temp_res, result[c]);
1275
 
      lp_build_endif(&ifthen);
1276
 
   }
1277
 
 
1278
 
   lp_build_endif(&exec_ifthen);
1279
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1280
 
                          NULL, LLVMIntUGE);
1281
 
   for (unsigned c = 0; c < nc; c++)
1282
 
      outval[c] = LLVMBuildLoad(gallivm->builder, result[c], "");
1283
 
 
1284
 
}
1285
 
 
1286
 
static void emit_store_mem(struct lp_build_nir_context *bld_base,
1287
 
                           unsigned writemask,
1288
 
                           unsigned nc,
1289
 
                           unsigned bit_size,
1290
 
                           LLVMValueRef index,
1291
 
                           LLVMValueRef offset,
1292
 
                           LLVMValueRef dst)
1293
 
{
1294
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1295
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1296
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1297
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1298
 
   struct lp_build_context *store_bld;
1299
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
1300
 
   store_bld = get_int_bld(bld_base, true, bit_size);
1301
 
 
1302
 
   offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1303
 
 
1304
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
1305
 
   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1306
 
   struct lp_build_loop_state loop_state;
1307
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1308
 
   LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1309
 
   LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1310
 
 
1311
 
   struct lp_build_if_state exec_ifthen;
1312
 
   lp_build_if(&exec_ifthen, gallivm, loop_cond);
1313
 
 
1314
 
   LLVMValueRef ssbo_limit;
1315
 
   LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1316
 
                                                  loop_state.counter, &ssbo_limit);
1317
 
 
1318
 
   for (unsigned c = 0; c < nc; c++) {
1319
 
      if (!(writemask & (1u << c)))
1320
 
         continue;
1321
 
      LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1322
 
      LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1323
 
      LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1324
 
 
1325
 
      if (ssbo_limit) {
1326
 
         LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1327
 
         do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1328
 
      }
1329
 
 
1330
 
      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1331
 
                                                       loop_state.counter, "");
1332
 
      value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1333
 
      struct lp_build_if_state ifthen;
1334
 
      LLVMValueRef store_cond;
1335
 
 
1336
 
      store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1337
 
      lp_build_if(&ifthen, gallivm, store_cond);
1338
 
      lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1339
 
      lp_build_endif(&ifthen);
1340
 
   }
1341
 
 
1342
 
   lp_build_endif(&exec_ifthen);
1343
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1344
 
                             NULL, LLVMIntUGE);
1345
 
 
1346
 
}
1347
 
 
1348
 
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1349
 
                            nir_intrinsic_op nir_op,
1350
 
                            uint32_t bit_size,
1351
 
                            LLVMValueRef index, LLVMValueRef offset,
1352
 
                            LLVMValueRef val, LLVMValueRef val2,
1353
 
                            LLVMValueRef *result)
1354
 
{
1355
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1356
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1357
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1358
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1359
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
1360
 
   struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1361
 
 
1362
 
   offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1363
 
   LLVMValueRef atom_res = lp_build_alloca(gallivm,
1364
 
                                           atomic_bld->vec_type, "");
1365
 
 
1366
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
1367
 
   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1368
 
   struct lp_build_loop_state loop_state;
1369
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1370
 
   LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1371
 
   LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1372
 
 
1373
 
   struct lp_build_if_state exec_ifthen;
1374
 
   lp_build_if(&exec_ifthen, gallivm, loop_cond);
1375
 
 
1376
 
   LLVMValueRef ssbo_limit;
1377
 
   LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index,
1378
 
                                                  loop_state.counter, &ssbo_limit);
1379
 
 
1380
 
   LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1381
 
   if (ssbo_limit) {
1382
 
      LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1383
 
      do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1384
 
   }
1385
 
 
1386
 
   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1387
 
                                                    loop_state.counter, "");
1388
 
   value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1389
 
 
1390
 
   LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, "");
1391
 
 
1392
 
   struct lp_build_if_state ifthen;
1393
 
   LLVMValueRef inner_cond, temp_res;
1394
 
   LLVMValueRef scalar;
1395
 
 
1396
 
   inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1397
 
   lp_build_if(&ifthen, gallivm, inner_cond);
1398
 
 
1399
 
   if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1400
 
      LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1401
 
                                                         loop_state.counter, "");
1402
 
      cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1403
 
      scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1404
 
                                      cas_src_ptr,
1405
 
                                      LLVMAtomicOrderingSequentiallyConsistent,
1406
 
                                      LLVMAtomicOrderingSequentiallyConsistent,
1407
 
                                      false);
1408
 
      scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1409
 
   } else {
1410
 
      LLVMAtomicRMWBinOp op;
1411
 
 
1412
 
      switch (nir_op) {
1413
 
      case nir_intrinsic_shared_atomic_add:
1414
 
      case nir_intrinsic_ssbo_atomic_add:
1415
 
         op = LLVMAtomicRMWBinOpAdd;
1416
 
         break;
1417
 
      case nir_intrinsic_shared_atomic_exchange:
1418
 
      case nir_intrinsic_ssbo_atomic_exchange:
1419
 
         op = LLVMAtomicRMWBinOpXchg;
1420
 
         break;
1421
 
      case nir_intrinsic_shared_atomic_and:
1422
 
      case nir_intrinsic_ssbo_atomic_and:
1423
 
         op = LLVMAtomicRMWBinOpAnd;
1424
 
         break;
1425
 
      case nir_intrinsic_shared_atomic_or:
1426
 
      case nir_intrinsic_ssbo_atomic_or:
1427
 
         op = LLVMAtomicRMWBinOpOr;
1428
 
         break;
1429
 
      case nir_intrinsic_shared_atomic_xor:
1430
 
      case nir_intrinsic_ssbo_atomic_xor:
1431
 
         op = LLVMAtomicRMWBinOpXor;
1432
 
         break;
1433
 
      case nir_intrinsic_shared_atomic_umin:
1434
 
      case nir_intrinsic_ssbo_atomic_umin:
1435
 
         op = LLVMAtomicRMWBinOpUMin;
1436
 
         break;
1437
 
      case nir_intrinsic_shared_atomic_umax:
1438
 
      case nir_intrinsic_ssbo_atomic_umax:
1439
 
         op = LLVMAtomicRMWBinOpUMax;
1440
 
         break;
1441
 
      case nir_intrinsic_ssbo_atomic_imin:
1442
 
      case nir_intrinsic_shared_atomic_imin:
1443
 
         op = LLVMAtomicRMWBinOpMin;
1444
 
         break;
1445
 
      case nir_intrinsic_ssbo_atomic_imax:
1446
 
      case nir_intrinsic_shared_atomic_imax:
1447
 
         op = LLVMAtomicRMWBinOpMax;
1448
 
         break;
1449
 
      default:
1450
 
         unreachable("unknown atomic op");
1451
 
      }
1452
 
      scalar = LLVMBuildAtomicRMW(builder, op,
1453
 
                                  scalar_ptr, value_ptr,
1454
 
                                  LLVMAtomicOrderingSequentiallyConsistent,
1455
 
                                  false);
1456
 
   }
1457
 
   temp_res = LLVMBuildLoad(builder, atom_res, "");
1458
 
   temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1459
 
   LLVMBuildStore(builder, temp_res, atom_res);
1460
 
   lp_build_else(&ifthen);
1461
 
   temp_res = LLVMBuildLoad(builder, atom_res, "");
1462
 
   LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1463
 
   temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1464
 
   LLVMBuildStore(builder, temp_res, atom_res);
1465
 
   lp_build_endif(&ifthen);
1466
 
 
1467
 
   lp_build_endif(&exec_ifthen);
1468
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1469
 
                          NULL, LLVMIntUGE);
1470
 
   *result = LLVMBuildLoad(builder, atom_res, "");
1471
 
}
1472
 
 
1473
 
static void emit_barrier(struct lp_build_nir_context *bld_base)
1474
 
{
1475
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1476
 
   struct gallivm_state * gallivm = bld_base->base.gallivm;
1477
 
 
1478
 
   LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1479
 
 
1480
 
   lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1481
 
   LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1482
 
}
1483
 
 
1484
 
static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1485
 
                                       LLVMValueRef index)
1486
 
{
1487
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1488
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1489
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1490
 
   struct lp_build_context *bld_broad = &bld_base->uint_bld;
1491
 
   LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1492
 
                                              LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1493
 
   return lp_build_broadcast_scalar(bld_broad, size_ptr);
1494
 
}
1495
 
 
1496
 
static void emit_image_op(struct lp_build_nir_context *bld_base,
1497
 
                          struct lp_img_params *params)
1498
 
{
1499
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1500
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1501
 
 
1502
 
   params->type = bld_base->base.type;
1503
 
   params->context_ptr = bld->context_ptr;
1504
 
   params->thread_data_ptr = bld->thread_data_ptr;
1505
 
   params->exec_mask = mask_vec(bld_base);
1506
 
 
1507
 
   if (params->image_index_offset)
1508
 
      params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1509
 
                                                           lp_build_const_int32(gallivm, 0), "");
1510
 
 
1511
 
   bld->image->emit_op(bld->image,
1512
 
                       bld->bld_base.base.gallivm,
1513
 
                       params);
1514
 
 
1515
 
}
1516
 
 
1517
 
static void emit_image_size(struct lp_build_nir_context *bld_base,
1518
 
                            struct lp_sampler_size_query_params *params)
1519
 
{
1520
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1521
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1522
 
 
1523
 
   params->int_type = bld_base->int_bld.type;
1524
 
   params->context_ptr = bld->context_ptr;
1525
 
 
1526
 
   if (params->texture_unit_offset)
1527
 
      params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1528
 
                                                            lp_build_const_int32(gallivm, 0), "");
1529
 
   bld->image->emit_size_query(bld->image,
1530
 
                               bld->bld_base.base.gallivm,
1531
 
                               params);
1532
 
 
1533
 
}
1534
 
 
1535
 
static void init_var_slots(struct lp_build_nir_context *bld_base,
1536
 
                           nir_variable *var, unsigned sc)
1537
 
{
1538
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1539
 
   unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1540
 
 
1541
 
   if (!bld->outputs)
1542
 
     return;
1543
 
   for (unsigned comp = sc; comp < slots + sc; comp++) {
1544
 
      unsigned this_loc = var->data.driver_location + (comp / 4);
1545
 
      unsigned this_chan = comp % 4;
1546
 
 
1547
 
      if (!bld->outputs[this_loc][this_chan])
1548
 
         bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1549
 
                                                             bld_base->base.vec_type, "output");
1550
 
   }
1551
 
}
1552
 
 
1553
 
static void emit_var_decl(struct lp_build_nir_context *bld_base,
1554
 
                          nir_variable *var)
1555
 
{
1556
 
   unsigned sc = var->data.location_frac;
1557
 
   switch (var->data.mode) {
1558
 
   case nir_var_shader_out: {
1559
 
      if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1560
 
         if (var->data.location == FRAG_RESULT_STENCIL)
1561
 
            sc = 1;
1562
 
         else if (var->data.location == FRAG_RESULT_DEPTH)
1563
 
            sc = 2;
1564
 
      }
1565
 
      init_var_slots(bld_base, var, sc);
1566
 
      break;
1567
 
   }
1568
 
   default:
1569
 
      break;
1570
 
   }
1571
 
}
1572
 
 
1573
 
static void emit_tex(struct lp_build_nir_context *bld_base,
1574
 
                     struct lp_sampler_params *params)
1575
 
{
1576
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1577
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1578
 
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1579
 
 
1580
 
   params->type = bld_base->base.type;
1581
 
   params->context_ptr = bld->context_ptr;
1582
 
   params->thread_data_ptr = bld->thread_data_ptr;
1583
 
 
1584
 
   if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1585
 
      /* this is horrible but this can be dynamic */
1586
 
      LLVMValueRef coords[5];
1587
 
      LLVMValueRef *orig_texel_ptr;
1588
 
      struct lp_build_context *uint_bld = &bld_base->uint_bld;
1589
 
      LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1590
 
                                 LLVMGetUndef(bld_base->base.vec_type),
1591
 
                                 LLVMGetUndef(bld_base->base.vec_type),
1592
 
                                 LLVMGetUndef(bld_base->base.vec_type) };
1593
 
      LLVMValueRef texel[4], orig_offset, orig_lod;
1594
 
      unsigned i;
1595
 
      orig_texel_ptr = params->texel;
1596
 
      orig_lod = params->lod;
1597
 
      for (i = 0; i < 5; i++) {
1598
 
         coords[i] = params->coords[i];
1599
 
      }
1600
 
      orig_offset = params->texture_index_offset;
1601
 
 
1602
 
      for (unsigned v = 0; v < uint_bld->type.length; v++) {
1603
 
         LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1604
 
         LLVMValueRef new_coords[5];
1605
 
         for (i = 0; i < 5; i++) {
1606
 
            new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1607
 
                                                    coords[i], idx, "");
1608
 
         }
1609
 
         params->coords = new_coords;
1610
 
         params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1611
 
                                                                orig_offset,
1612
 
                                                                idx, "");
1613
 
         params->type = lp_elem_type(bld_base->base.type);
1614
 
 
1615
 
         if (orig_lod)
1616
 
            params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1617
 
         params->texel = texel;
1618
 
         bld->sampler->emit_tex_sample(bld->sampler,
1619
 
                                       gallivm,
1620
 
                                       params);
1621
 
 
1622
 
         for (i = 0; i < 4; i++) {
1623
 
            result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1624
 
         }
1625
 
      }
1626
 
      for (i = 0; i < 4; i++) {
1627
 
         orig_texel_ptr[i] = result[i];
1628
 
      }
1629
 
      return;
1630
 
   }
1631
 
 
1632
 
   if (params->texture_index_offset) {
1633
 
      struct lp_build_loop_state loop_state;
1634
 
      LLVMValueRef exec_mask = mask_vec(bld_base);
1635
 
      LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1636
 
      LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1637
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1638
 
      LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1639
 
 
1640
 
      struct lp_build_if_state ifthen;
1641
 
      lp_build_if(&ifthen, gallivm, if_cond);
1642
 
      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1643
 
                                                       loop_state.counter, "");
1644
 
      LLVMBuildStore(builder, value_ptr, res_store);
1645
 
      lp_build_endif(&ifthen);
1646
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1647
 
                             NULL, LLVMIntUGE);
1648
 
      LLVMValueRef idx_val = LLVMBuildLoad(builder, res_store, "");
1649
 
      params->texture_index_offset = idx_val;
1650
 
   }
1651
 
 
1652
 
   params->type = bld_base->base.type;
1653
 
   bld->sampler->emit_tex_sample(bld->sampler,
1654
 
                                 bld->bld_base.base.gallivm,
1655
 
                                 params);
1656
 
}
1657
 
 
1658
 
static void emit_tex_size(struct lp_build_nir_context *bld_base,
1659
 
                          struct lp_sampler_size_query_params *params)
1660
 
{
1661
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1662
 
 
1663
 
   params->int_type = bld_base->int_bld.type;
1664
 
   params->context_ptr = bld->context_ptr;
1665
 
 
1666
 
   if (params->texture_unit_offset)
1667
 
      params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1668
 
                                                             params->texture_unit_offset,
1669
 
                                                             lp_build_const_int32(bld_base->base.gallivm, 0), "");
1670
 
   bld->sampler->emit_size_query(bld->sampler,
1671
 
                                 bld->bld_base.base.gallivm,
1672
 
                                 params);
1673
 
}
1674
 
 
1675
 
static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1676
 
                               nir_intrinsic_instr *instr,
1677
 
                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1678
 
{
1679
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1680
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1681
 
   struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1682
 
   switch (instr->intrinsic) {
1683
 
   case nir_intrinsic_load_instance_id:
1684
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1685
 
      break;
1686
 
   case nir_intrinsic_load_base_instance:
1687
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1688
 
      break;
1689
 
   case nir_intrinsic_load_base_vertex:
1690
 
      result[0] = bld->system_values.basevertex;
1691
 
      break;
1692
 
   case nir_intrinsic_load_first_vertex:
1693
 
      result[0] = bld->system_values.firstvertex;
1694
 
      break;
1695
 
   case nir_intrinsic_load_vertex_id:
1696
 
      result[0] = bld->system_values.vertex_id;
1697
 
      break;
1698
 
   case nir_intrinsic_load_primitive_id:
1699
 
      result[0] = bld->system_values.prim_id;
1700
 
      break;
1701
 
   case nir_intrinsic_load_workgroup_id: {
1702
 
      LLVMValueRef tmp[3];
1703
 
      for (unsigned i = 0; i < 3; i++) {
1704
 
         tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1705
 
         if (instr->dest.ssa.bit_size == 64)
1706
 
            tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1707
 
         result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1708
 
      }
1709
 
      break;
1710
 
   }
1711
 
   case nir_intrinsic_load_local_invocation_id:
1712
 
      for (unsigned i = 0; i < 3; i++)
1713
 
         result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1714
 
      break;
1715
 
   case nir_intrinsic_load_local_invocation_index: {
1716
 
      LLVMValueRef tmp, tmp2;
1717
 
      tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1718
 
      tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1719
 
      tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1720
 
      tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1721
 
 
1722
 
      tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1723
 
      tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1724
 
      tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1725
 
      tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1726
 
      result[0] = tmp;
1727
 
      break;
1728
 
   }
1729
 
   case nir_intrinsic_load_num_workgroups: {
1730
 
      LLVMValueRef tmp[3];
1731
 
      for (unsigned i = 0; i < 3; i++) {
1732
 
         tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1733
 
         if (instr->dest.ssa.bit_size == 64)
1734
 
            tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1735
 
         result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1736
 
      }
1737
 
      break;
1738
 
   }
1739
 
   case nir_intrinsic_load_invocation_id:
1740
 
      if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1741
 
         result[0] = bld->system_values.invocation_id;
1742
 
      else
1743
 
         result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1744
 
      break;
1745
 
   case nir_intrinsic_load_front_face:
1746
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1747
 
      break;
1748
 
   case nir_intrinsic_load_draw_id:
1749
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1750
 
      break;
1751
 
   default:
1752
 
      break;
1753
 
   case nir_intrinsic_load_workgroup_size:
1754
 
     for (unsigned i = 0; i < 3; i++)
1755
 
       result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1756
 
     break;
1757
 
   case nir_intrinsic_load_work_dim:
1758
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1759
 
      break;
1760
 
   case nir_intrinsic_load_tess_coord:
1761
 
      for (unsigned i = 0; i < 3; i++) {
1762
 
         result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1763
 
      }
1764
 
      break;
1765
 
   case nir_intrinsic_load_tess_level_outer:
1766
 
      for (unsigned i = 0; i < 4; i++)
1767
 
         result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1768
 
      break;
1769
 
   case nir_intrinsic_load_tess_level_inner:
1770
 
      for (unsigned i = 0; i < 2; i++)
1771
 
         result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1772
 
      break;
1773
 
   case nir_intrinsic_load_patch_vertices_in:
1774
 
      result[0] = bld->system_values.vertices_in;
1775
 
      break;
1776
 
   case nir_intrinsic_load_sample_id:
1777
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1778
 
      break;
1779
 
   case nir_intrinsic_load_sample_pos:
1780
 
      for (unsigned i = 0; i < 2; i++) {
1781
 
         LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1782
 
         idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1783
 
         LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1784
 
         result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1785
 
      }
1786
 
      break;
1787
 
   case nir_intrinsic_load_sample_mask_in:
1788
 
      result[0] = bld->system_values.sample_mask_in;
1789
 
      break;
1790
 
   case nir_intrinsic_load_view_index:
1791
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1792
 
      break;
1793
 
   case nir_intrinsic_load_subgroup_invocation: {
1794
 
      LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1795
 
      for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1796
 
         elems[i] = lp_build_const_int32(gallivm, i);
1797
 
      result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1798
 
      break;
1799
 
   }
1800
 
   case nir_intrinsic_load_subgroup_id:
1801
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1802
 
      break;
1803
 
   case nir_intrinsic_load_num_subgroups:
1804
 
      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1805
 
      break;
1806
 
   }
1807
 
}
1808
 
 
1809
 
static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1810
 
                                   LLVMValueRef *dst)
1811
 
{
1812
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1813
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1814
 
   *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1815
 
}
1816
 
 
1817
 
static void bgnloop(struct lp_build_nir_context *bld_base)
1818
 
{
1819
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1820
 
   lp_exec_bgnloop(&bld->exec_mask, true);
1821
 
}
1822
 
 
1823
 
static void endloop(struct lp_build_nir_context *bld_base)
1824
 
{
1825
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1826
 
   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1827
 
}
1828
 
 
1829
 
static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1830
 
{
1831
 
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1832
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1833
 
   lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1834
 
}
1835
 
 
1836
 
static void else_stmt(struct lp_build_nir_context *bld_base)
1837
 
{
1838
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1839
 
   lp_exec_mask_cond_invert(&bld->exec_mask);
1840
 
}
1841
 
 
1842
 
static void endif_stmt(struct lp_build_nir_context *bld_base)
1843
 
{
1844
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1845
 
   lp_exec_mask_cond_pop(&bld->exec_mask);
1846
 
}
1847
 
 
1848
 
static void break_stmt(struct lp_build_nir_context *bld_base)
1849
 
{
1850
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1851
 
 
1852
 
   lp_exec_break(&bld->exec_mask, NULL, false);
1853
 
}
1854
 
 
1855
 
static void continue_stmt(struct lp_build_nir_context *bld_base)
1856
 
{
1857
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1858
 
   lp_exec_continue(&bld->exec_mask);
1859
 
}
1860
 
 
1861
 
static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1862
 
{
1863
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1864
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1865
 
   LLVMValueRef mask;
1866
 
 
1867
 
   if (!cond) {
1868
 
      if (bld->exec_mask.has_mask) {
1869
 
         mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1870
 
      } else {
1871
 
         mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1872
 
      }
1873
 
   } else {
1874
 
      mask = LLVMBuildNot(builder, cond, "");
1875
 
      if (bld->exec_mask.has_mask) {
1876
 
         LLVMValueRef invmask;
1877
 
         invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1878
 
         mask = LLVMBuildOr(builder, mask, invmask, "");
1879
 
      }
1880
 
   }
1881
 
   lp_build_mask_update(bld->mask, mask);
1882
 
}
1883
 
 
1884
 
static void
1885
 
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1886
 
                          LLVMValueRef ptr,
1887
 
                          LLVMValueRef mask)
1888
 
{
1889
 
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1890
 
   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1891
 
 
1892
 
   current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1893
 
 
1894
 
   LLVMBuildStore(builder, current_vec, ptr);
1895
 
}
1896
 
 
1897
 
static void
1898
 
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1899
 
                             LLVMValueRef ptr,
1900
 
                             LLVMValueRef mask)
1901
 
{
1902
 
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1903
 
   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1904
 
 
1905
 
   current_vec = lp_build_select(&bld_base->uint_bld,
1906
 
                                 mask,
1907
 
                                 bld_base->uint_bld.zero,
1908
 
                                 current_vec);
1909
 
 
1910
 
   LLVMBuildStore(builder, current_vec, ptr);
1911
 
}
1912
 
 
1913
 
static LLVMValueRef
1914
 
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1915
 
                                  LLVMValueRef current_mask_vec,
1916
 
                                  LLVMValueRef total_emitted_vertices_vec)
1917
 
{
1918
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1919
 
   struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1920
 
   LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1921
 
                                            total_emitted_vertices_vec,
1922
 
                                            bld->max_output_vertices_vec);
1923
 
 
1924
 
   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1925
 
}
1926
 
 
1927
 
static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1928
 
{
1929
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1930
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1931
 
 
1932
 
   if (stream_id >= bld->gs_vertex_streams)
1933
 
      return;
1934
 
   assert(bld->gs_iface->emit_vertex);
1935
 
   LLVMValueRef total_emitted_vertices_vec =
1936
 
      LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1937
 
   LLVMValueRef mask = mask_vec(bld_base);
1938
 
   mask = clamp_mask_to_max_output_vertices(bld, mask,
1939
 
                                            total_emitted_vertices_vec);
1940
 
   bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1941
 
                              bld->outputs,
1942
 
                              total_emitted_vertices_vec,
1943
 
                              mask,
1944
 
                              lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1945
 
 
1946
 
   increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1947
 
                             mask);
1948
 
   increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1949
 
                             mask);
1950
 
}
1951
 
 
1952
 
static void
1953
 
end_primitive_masked(struct lp_build_nir_context * bld_base,
1954
 
                     LLVMValueRef mask, uint32_t stream_id)
1955
 
{
1956
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1957
 
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1958
 
 
1959
 
   if (stream_id >= bld->gs_vertex_streams)
1960
 
      return;
1961
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1962
 
   LLVMValueRef emitted_vertices_vec =
1963
 
      LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1964
 
   LLVMValueRef emitted_prims_vec =
1965
 
      LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1966
 
   LLVMValueRef total_emitted_vertices_vec =
1967
 
      LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1968
 
 
1969
 
   LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1970
 
                                            PIPE_FUNC_NOTEQUAL,
1971
 
                                            emitted_vertices_vec,
1972
 
                                            uint_bld->zero);
1973
 
   mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1974
 
   bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1975
 
                                total_emitted_vertices_vec,
1976
 
                                emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1977
 
   increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1978
 
                             mask);
1979
 
   clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1980
 
                                mask);
1981
 
}
1982
 
 
1983
 
static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1984
 
{
1985
 
   ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1986
 
 
1987
 
   assert(bld->gs_iface->end_primitive);
1988
 
 
1989
 
   LLVMValueRef mask = mask_vec(bld_base);
1990
 
   end_primitive_masked(bld_base, mask, stream_id);
1991
 
}
1992
 
 
1993
 
static void
1994
 
emit_prologue(struct lp_build_nir_soa_context *bld)
1995
 
{
1996
 
   struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1997
 
   if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1998
 
      uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1999
 
      unsigned index, chan;
2000
 
      LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2001
 
      LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2002
 
      bld->inputs_array = lp_build_array_alloca(gallivm,
2003
 
                                               vec_type, array_size,
2004
 
                                               "input_array");
2005
 
 
2006
 
      for (index = 0; index < num_inputs; ++index) {
2007
 
         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2008
 
            LLVMValueRef lindex =
2009
 
               lp_build_const_int32(gallivm, index * 4 + chan);
2010
 
            LLVMValueRef input_ptr =
2011
 
               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2012
 
                            &lindex, 1, "");
2013
 
            LLVMValueRef value = bld->inputs[index][chan];
2014
 
            if (value)
2015
 
               LLVMBuildStore(gallivm->builder, value, input_ptr);
2016
 
         }
2017
 
      }
2018
 
   }
2019
 
}
2020
 
 
2021
 
static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2022
 
                      nir_intrinsic_instr *instr, LLVMValueRef result[4])
2023
 
{
2024
 
   struct gallivm_state * gallivm = bld_base->base.gallivm;
2025
 
   LLVMBuilderRef builder = gallivm->builder;
2026
 
   uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2027
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2028
 
   struct lp_build_loop_state loop_state;
2029
 
   LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2030
 
 
2031
 
   LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2032
 
   LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2033
 
   LLVMValueRef init_val = NULL;
2034
 
   if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2035
 
       instr->intrinsic == nir_intrinsic_vote_feq) {
2036
 
      /* for equal we unfortunately have to loop and find the first valid one. */
2037
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2038
 
      LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2039
 
 
2040
 
      struct lp_build_if_state ifthen;
2041
 
      lp_build_if(&ifthen, gallivm, if_cond);
2042
 
      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2043
 
                                                       loop_state.counter, "");
2044
 
      LLVMBuildStore(builder, value_ptr, eq_store);
2045
 
      LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2046
 
      lp_build_endif(&ifthen);
2047
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2048
 
                             NULL, LLVMIntUGE);
2049
 
      init_val = LLVMBuildLoad(builder, eq_store, "");
2050
 
   } else {
2051
 
      LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2052
 
   }
2053
 
 
2054
 
   LLVMValueRef res;
2055
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2056
 
   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2057
 
                                                       loop_state.counter, "");
2058
 
   struct lp_build_if_state ifthen;
2059
 
   LLVMValueRef if_cond;
2060
 
   if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2061
 
 
2062
 
   lp_build_if(&ifthen, gallivm, if_cond);
2063
 
   res = LLVMBuildLoad(builder, res_store, "");
2064
 
 
2065
 
   if (instr->intrinsic == nir_intrinsic_vote_feq) {
2066
 
      struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2067
 
      LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2068
 
                                       LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2069
 
                                       LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2070
 
      tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2071
 
      res = LLVMBuildAnd(builder, res, tmp, "");
2072
 
   } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2073
 
      LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2074
 
      tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2075
 
      res = LLVMBuildAnd(builder, res, tmp, "");
2076
 
   } else if (instr->intrinsic == nir_intrinsic_vote_any)
2077
 
      res = LLVMBuildOr(builder, res, value_ptr, "");
2078
 
   else
2079
 
      res = LLVMBuildAnd(builder, res, value_ptr, "");
2080
 
   LLVMBuildStore(builder, res, res_store);
2081
 
   lp_build_endif(&ifthen);
2082
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2083
 
                          NULL, LLVMIntUGE);
2084
 
   result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
2085
 
}
2086
 
 
2087
 
static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2088
 
{
2089
 
   struct gallivm_state * gallivm = bld_base->base.gallivm;
2090
 
   LLVMBuilderRef builder = gallivm->builder;
2091
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2092
 
   struct lp_build_loop_state loop_state;
2093
 
   src = LLVMBuildAnd(builder, src, exec_mask, "");
2094
 
   LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2095
 
   LLVMValueRef res;
2096
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2097
 
   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2098
 
                                                    loop_state.counter, "");
2099
 
   res = LLVMBuildLoad(builder, res_store, "");
2100
 
   res = LLVMBuildOr(builder,
2101
 
                     res,
2102
 
                     LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2103
 
   LLVMBuildStore(builder, res, res_store);
2104
 
 
2105
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2106
 
                          NULL, LLVMIntUGE);
2107
 
   result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
2108
 
}
2109
 
 
2110
 
static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2111
 
{
2112
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
2113
 
   LLVMBuilderRef builder = gallivm->builder;
2114
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2115
 
   struct lp_build_loop_state loop_state;
2116
 
 
2117
 
   LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2118
 
   LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2119
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2120
 
   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2121
 
                                                    loop_state.counter, "");
2122
 
   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2123
 
                                     LLVMIntEQ,
2124
 
                                     value_ptr,
2125
 
                                     lp_build_const_int32(gallivm, -1), "");
2126
 
   LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2127
 
                                      LLVMIntEQ,
2128
 
                                      LLVMBuildLoad(builder, found_store, ""),
2129
 
                                      lp_build_const_int32(gallivm, 0), "");
2130
 
 
2131
 
   cond = LLVMBuildAnd(builder, cond, cond2, "");
2132
 
   struct lp_build_if_state ifthen;
2133
 
   lp_build_if(&ifthen, gallivm, cond);
2134
 
   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2135
 
   LLVMBuildStore(builder, loop_state.counter, idx_store);
2136
 
   lp_build_endif(&ifthen);
2137
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2138
 
                          NULL, LLVMIntUGE);
2139
 
 
2140
 
   result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2141
 
                                      lp_build_const_int32(gallivm, -1),
2142
 
                                      LLVMBuildLoad(builder, idx_store, ""),
2143
 
                                      "");
2144
 
}
2145
 
 
2146
 
#if LLVM_VERSION_MAJOR >= 10
2147
 
static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2148
 
                        nir_intrinsic_instr *instr, LLVMValueRef result[4])
2149
 
{
2150
 
   assert(instr->intrinsic == nir_intrinsic_shuffle);
2151
 
 
2152
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
2153
 
   LLVMBuilderRef builder = gallivm->builder;
2154
 
   uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2155
 
   uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2156
 
   struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2157
 
 
2158
 
   bool index_is_constant_data = LLVMIsAConstantAggregateZero(index) || LLVMIsAConstantDataSequential(index) || LLVMIsAUndefValue(index);
2159
 
 
2160
 
   if (index_is_constant_data) {
2161
 
      /* freeze `src` in case inactive invocations contain poison */
2162
 
      src = LLVMBuildFreeze(builder, src, "");
2163
 
      result[0] = LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), index, "");
2164
 
   } else if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2165
 
      /* freeze `src` in case inactive invocations contain poison */
2166
 
      src = LLVMBuildFreeze(builder, src, "");
2167
 
      result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2168
 
   } else {
2169
 
      LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2170
 
      struct lp_build_loop_state loop_state;
2171
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2172
 
 
2173
 
      LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2174
 
 
2175
 
      LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2176
 
      /* freeze `src_value` in case an out-of-bounds index or an index into an
2177
 
       * inactive invocation results in poison
2178
 
       */
2179
 
      src_value = LLVMBuildFreeze(builder, src_value, "");
2180
 
 
2181
 
      LLVMValueRef res = LLVMBuildLoad(builder, res_store, "");
2182
 
      res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2183
 
      LLVMBuildStore(builder, res, res_store);
2184
 
 
2185
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2186
 
                             NULL, LLVMIntUGE);
2187
 
 
2188
 
      result[0] = LLVMBuildLoad(builder, res_store, "");
2189
 
   }
2190
 
}
2191
 
#endif
2192
 
 
2193
 
static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2194
 
                        nir_intrinsic_instr *instr, LLVMValueRef result[4])
2195
 
{
2196
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
2197
 
   LLVMBuilderRef builder = gallivm->builder;
2198
 
   uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2199
 
   /* can't use llvm reduction intrinsics because of exec_mask */
2200
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2201
 
   struct lp_build_loop_state loop_state;
2202
 
   nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2203
 
 
2204
 
   LLVMValueRef res_store = NULL;
2205
 
   LLVMValueRef scan_store;
2206
 
   struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2207
 
 
2208
 
   if (instr->intrinsic != nir_intrinsic_reduce)
2209
 
      res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2210
 
 
2211
 
   scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2212
 
 
2213
 
   struct lp_build_context elem_bld;
2214
 
   bool is_flt = reduction_op == nir_op_fadd ||
2215
 
      reduction_op == nir_op_fmul ||
2216
 
      reduction_op == nir_op_fmin ||
2217
 
      reduction_op == nir_op_fmax;
2218
 
   bool is_unsigned = reduction_op == nir_op_umin ||
2219
 
      reduction_op == nir_op_umax;
2220
 
 
2221
 
   struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2222
 
      get_int_bld(bld_base, is_unsigned, bit_size);
2223
 
 
2224
 
   lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2225
 
 
2226
 
   LLVMValueRef store_val = NULL;
2227
 
   /*
2228
 
    * Put the identity value for the operation into the storage
2229
 
    */
2230
 
   switch (reduction_op) {
2231
 
   case nir_op_fmin: {
2232
 
      LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2233
 
         (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2234
 
      store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2235
 
      break;
2236
 
   }
2237
 
   case nir_op_fmax: {
2238
 
      LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2239
 
         (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2240
 
      store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2241
 
      break;
2242
 
   }
2243
 
   case nir_op_fmul: {
2244
 
      LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2245
 
         (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2246
 
      store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2247
 
      break;
2248
 
   }
2249
 
   case nir_op_umin:
2250
 
      switch (bit_size) {
2251
 
      case 8:
2252
 
         store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2253
 
         break;
2254
 
      case 16:
2255
 
         store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2256
 
         break;
2257
 
      case 32:
2258
 
      default:
2259
 
         store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2260
 
         break;
2261
 
      case 64:
2262
 
         store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2263
 
         break;
2264
 
      }
2265
 
      break;
2266
 
   case nir_op_imin:
2267
 
      switch (bit_size) {
2268
 
      case 8:
2269
 
         store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2270
 
         break;
2271
 
      case 16:
2272
 
         store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2273
 
         break;
2274
 
      case 32:
2275
 
      default:
2276
 
         store_val  = lp_build_const_int32(gallivm, INT_MAX);
2277
 
         break;
2278
 
      case 64:
2279
 
         store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2280
 
         break;
2281
 
      }
2282
 
      break;
2283
 
   case nir_op_imax:
2284
 
      switch (bit_size) {
2285
 
      case 8:
2286
 
         store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2287
 
         break;
2288
 
      case 16:
2289
 
         store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2290
 
         break;
2291
 
      case 32:
2292
 
      default:
2293
 
         store_val  = lp_build_const_int32(gallivm, INT_MIN);
2294
 
         break;
2295
 
      case 64:
2296
 
         store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2297
 
         break;
2298
 
      }
2299
 
      break;
2300
 
   case nir_op_imul:
2301
 
      switch (bit_size) {
2302
 
      case 8:
2303
 
         store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2304
 
         break;
2305
 
      case 16:
2306
 
         store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2307
 
         break;
2308
 
      case 32:
2309
 
      default:
2310
 
         store_val  = lp_build_const_int32(gallivm, 1);
2311
 
         break;
2312
 
      case 64:
2313
 
         store_val  = lp_build_const_int64(gallivm, 1);
2314
 
         break;
2315
 
      }
2316
 
      break;
2317
 
   case nir_op_iand:
2318
 
      switch (bit_size) {
2319
 
      case 8:
2320
 
         store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2321
 
         break;
2322
 
      case 16:
2323
 
         store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2324
 
         break;
2325
 
      case 32:
2326
 
      default:
2327
 
         store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2328
 
         break;
2329
 
      case 64:
2330
 
         store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2331
 
         break;
2332
 
      }
2333
 
      break;
2334
 
   default:
2335
 
      break;
2336
 
   }
2337
 
   if (store_val)
2338
 
      LLVMBuildStore(builder, store_val, scan_store);
2339
 
 
2340
 
   LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2341
 
 
2342
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2343
 
 
2344
 
   struct lp_build_if_state ifthen;
2345
 
   LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2346
 
   lp_build_if(&ifthen, gallivm, if_cond);
2347
 
   LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2348
 
 
2349
 
   LLVMValueRef res = NULL;
2350
 
   LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");
2351
 
   if (instr->intrinsic != nir_intrinsic_reduce)
2352
 
      res = LLVMBuildLoad(gallivm->builder, res_store, "");
2353
 
 
2354
 
   if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2355
 
      res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2356
 
 
2357
 
   if (is_flt) {
2358
 
      scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2359
 
      value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2360
 
   }
2361
 
   switch (reduction_op) {
2362
 
   case nir_op_fadd:
2363
 
   case nir_op_iadd:
2364
 
      scan_val = lp_build_add(&elem_bld, value, scan_val);
2365
 
      break;
2366
 
   case nir_op_fmul:
2367
 
   case nir_op_imul:
2368
 
      scan_val = lp_build_mul(&elem_bld, value, scan_val);
2369
 
      break;
2370
 
   case nir_op_imin:
2371
 
   case nir_op_umin:
2372
 
   case nir_op_fmin:
2373
 
      scan_val = lp_build_min(&elem_bld, value, scan_val);
2374
 
      break;
2375
 
   case nir_op_imax:
2376
 
   case nir_op_umax:
2377
 
   case nir_op_fmax:
2378
 
      scan_val = lp_build_max(&elem_bld, value, scan_val);
2379
 
      break;
2380
 
   case nir_op_iand:
2381
 
      scan_val = lp_build_and(&elem_bld, value, scan_val);
2382
 
      break;
2383
 
   case nir_op_ior:
2384
 
      scan_val = lp_build_or(&elem_bld, value, scan_val);
2385
 
      break;
2386
 
   case nir_op_ixor:
2387
 
      scan_val = lp_build_xor(&elem_bld, value, scan_val);
2388
 
      break;
2389
 
   default:
2390
 
      assert(0);
2391
 
      break;
2392
 
   }
2393
 
   if (is_flt)
2394
 
      scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2395
 
   LLVMBuildStore(builder, scan_val, scan_store);
2396
 
 
2397
 
   if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2398
 
      res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2399
 
   }
2400
 
 
2401
 
   if (instr->intrinsic != nir_intrinsic_reduce)
2402
 
      LLVMBuildStore(builder, res, res_store);
2403
 
   lp_build_endif(&ifthen);
2404
 
 
2405
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2406
 
                          NULL, LLVMIntUGE);
2407
 
   if (instr->intrinsic == nir_intrinsic_reduce)
2408
 
      result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));
2409
 
   else
2410
 
      result[0] = LLVMBuildLoad(builder, res_store, "");
2411
 
}
2412
 
 
2413
 
static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2414
 
                                 LLVMValueRef src,
2415
 
                                 unsigned bit_size,
2416
 
                                 LLVMValueRef invoc,
2417
 
                                 LLVMValueRef result[4])
2418
 
{
2419
 
   struct gallivm_state *gallivm = bld_base->base.gallivm;
2420
 
   LLVMBuilderRef builder = gallivm->builder;
2421
 
   LLVMValueRef idx;
2422
 
   struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2423
 
 
2424
 
   /* have to find the first active invocation */
2425
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2426
 
   struct lp_build_loop_state loop_state;
2427
 
   LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2428
 
   LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2429
 
   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2430
 
 
2431
 
   LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2432
 
   struct lp_build_if_state ifthen;
2433
 
 
2434
 
   lp_build_if(&ifthen, gallivm, if_cond);
2435
 
   LLVMValueRef store_val = loop_state.counter;
2436
 
   if (invoc)
2437
 
      store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2438
 
   LLVMBuildStore(builder, store_val, res_store);
2439
 
   lp_build_endif(&ifthen);
2440
 
 
2441
 
   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2442
 
                          lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2443
 
   idx = LLVMBuildLoad(builder, res_store, "");
2444
 
 
2445
 
   LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2446
 
                                                src, idx, "");
2447
 
   result[0] = lp_build_broadcast_scalar(uint_bld, value);
2448
 
}
2449
 
 
2450
 
static void
2451
 
emit_interp_at(struct lp_build_nir_context *bld_base,
2452
 
               unsigned num_components,
2453
 
               nir_variable *var,
2454
 
               bool centroid,
2455
 
               bool sample,
2456
 
               unsigned const_index,
2457
 
               LLVMValueRef indir_index,
2458
 
               LLVMValueRef offsets[2],
2459
 
               LLVMValueRef dst[4])
2460
 
{
2461
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2462
 
 
2463
 
   for (unsigned i = 0; i < num_components; i++) {
2464
 
      dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2465
 
                                        const_index + var->data.driver_location, i + var->data.location_frac,
2466
 
                                        centroid, sample, indir_index, offsets);
2467
 
   }
2468
 
}
2469
 
 
2470
 
static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2471
 
                                               struct lp_type type,
2472
 
                                               unsigned scratch_size)
2473
 
{
2474
 
   LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2475
 
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2476
 
   unsigned i;
2477
 
 
2478
 
   if (type.length == 1)
2479
 
      return LLVMConstInt(elem_type, 0, 0);
2480
 
 
2481
 
   for (i = 0; i < type.length; ++i)
2482
 
      elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2483
 
 
2484
 
   return LLVMConstVector(elems, type.length);
2485
 
}
2486
 
 
2487
 
static void
2488
 
emit_load_scratch(struct lp_build_nir_context *bld_base,
2489
 
                  unsigned nc, unsigned bit_size,
2490
 
                  LLVMValueRef offset,
2491
 
                  LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2492
 
{
2493
 
   struct gallivm_state * gallivm = bld_base->base.gallivm;
2494
 
   LLVMBuilderRef builder = gallivm->builder;
2495
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2496
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
2497
 
   struct lp_build_context *load_bld;
2498
 
   LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2499
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
2500
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2501
 
 
2502
 
   load_bld = get_int_bld(bld_base, true, bit_size);
2503
 
 
2504
 
   offset = lp_build_add(uint_bld, offset, thread_offsets);
2505
 
   offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2506
 
   for (unsigned c = 0; c < nc; c++) {
2507
 
      LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2508
 
 
2509
 
      LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2510
 
      struct lp_build_loop_state loop_state;
2511
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2512
 
 
2513
 
      struct lp_build_if_state ifthen;
2514
 
      LLVMValueRef cond, temp_res;
2515
 
 
2516
 
      loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2517
 
                                           loop_state.counter, "");
2518
 
      cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2519
 
      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2520
 
 
2521
 
      lp_build_if(&ifthen, gallivm, cond);
2522
 
      LLVMValueRef scalar;
2523
 
      LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2524
 
      scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2525
 
 
2526
 
      temp_res = LLVMBuildLoad(builder, result, "");
2527
 
      temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2528
 
      LLVMBuildStore(builder, temp_res, result);
2529
 
      lp_build_else(&ifthen);
2530
 
      temp_res = LLVMBuildLoad(builder, result, "");
2531
 
      LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
2532
 
      temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2533
 
      LLVMBuildStore(builder, temp_res, result);
2534
 
      lp_build_endif(&ifthen);
2535
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2536
 
                                NULL, LLVMIntUGE);
2537
 
      outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
2538
 
   }
2539
 
}
2540
 
 
2541
 
static void
2542
 
emit_store_scratch(struct lp_build_nir_context *bld_base,
2543
 
                   unsigned writemask, unsigned nc,
2544
 
                   unsigned bit_size, LLVMValueRef offset,
2545
 
                   LLVMValueRef dst)
2546
 
{
2547
 
   struct gallivm_state * gallivm = bld_base->base.gallivm;
2548
 
   LLVMBuilderRef builder = gallivm->builder;
2549
 
   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2550
 
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
2551
 
   struct lp_build_context *store_bld;
2552
 
   LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2553
 
   uint32_t shift_val = bit_size_to_shift_size(bit_size);
2554
 
   store_bld = get_int_bld(bld_base, true, bit_size);
2555
 
 
2556
 
   LLVMValueRef exec_mask = mask_vec(bld_base);
2557
 
   offset = lp_build_add(uint_bld, offset, thread_offsets);
2558
 
   offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2559
 
 
2560
 
   for (unsigned c = 0; c < nc; c++) {
2561
 
      if (!(writemask & (1u << c)))
2562
 
         continue;
2563
 
      LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2564
 
      LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2565
 
 
2566
 
      struct lp_build_loop_state loop_state;
2567
 
      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2568
 
 
2569
 
      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2570
 
                                                       loop_state.counter, "");
2571
 
      value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2572
 
 
2573
 
      struct lp_build_if_state ifthen;
2574
 
      LLVMValueRef cond;
2575
 
 
2576
 
      loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2577
 
                                                        loop_state.counter, "");
2578
 
 
2579
 
      cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2580
 
      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2581
 
      lp_build_if(&ifthen, gallivm, cond);
2582
 
 
2583
 
      LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2584
 
      lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2585
 
 
2586
 
      lp_build_endif(&ifthen);
2587
 
      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2588
 
                             NULL, LLVMIntUGE);
2589
 
   }
2590
 
}
2591
 
 
2592
 
void lp_build_nir_soa(struct gallivm_state *gallivm,
2593
 
                      struct nir_shader *shader,
2594
 
                      const struct lp_build_tgsi_params *params,
2595
 
                      LLVMValueRef (*outputs)[4])
2596
 
{
2597
 
   struct lp_build_nir_soa_context bld;
2598
 
   struct lp_type type = params->type;
2599
 
   struct lp_type res_type;
2600
 
 
2601
 
   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2602
 
   memset(&res_type, 0, sizeof res_type);
2603
 
   res_type.width = type.width;
2604
 
   res_type.length = type.length;
2605
 
   res_type.sign = 1;
2606
 
 
2607
 
   /* Setup build context */
2608
 
   memset(&bld, 0, sizeof bld);
2609
 
   lp_build_context_init(&bld.bld_base.base, gallivm, type);
2610
 
   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2611
 
   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2612
 
   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2613
 
   lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2614
 
   {
2615
 
      struct lp_type dbl_type;
2616
 
      dbl_type = type;
2617
 
      dbl_type.width *= 2;
2618
 
      lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2619
 
   }
2620
 
   {
2621
 
      struct lp_type half_type;
2622
 
      half_type = type;
2623
 
      half_type.width /= 2;
2624
 
      lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2625
 
   }
2626
 
   {
2627
 
      struct lp_type uint64_type;
2628
 
      uint64_type = lp_uint_type(type);
2629
 
      uint64_type.width *= 2;
2630
 
      lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2631
 
   }
2632
 
   {
2633
 
      struct lp_type int64_type;
2634
 
      int64_type = lp_int_type(type);
2635
 
      int64_type.width *= 2;
2636
 
      lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2637
 
   }
2638
 
   {
2639
 
      struct lp_type uint16_type;
2640
 
      uint16_type = lp_uint_type(type);
2641
 
      uint16_type.width /= 2;
2642
 
      lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2643
 
   }
2644
 
   {
2645
 
      struct lp_type int16_type;
2646
 
      int16_type = lp_int_type(type);
2647
 
      int16_type.width /= 2;
2648
 
      lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2649
 
   }
2650
 
   {
2651
 
      struct lp_type uint8_type;
2652
 
      uint8_type = lp_uint_type(type);
2653
 
      uint8_type.width /= 4;
2654
 
      lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2655
 
   }
2656
 
   {
2657
 
      struct lp_type int8_type;
2658
 
      int8_type = lp_int_type(type);
2659
 
      int8_type.width /= 4;
2660
 
      lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2661
 
   }
2662
 
   bld.bld_base.load_var = emit_load_var;
2663
 
   bld.bld_base.store_var = emit_store_var;
2664
 
   bld.bld_base.load_reg = emit_load_reg;
2665
 
   bld.bld_base.store_reg = emit_store_reg;
2666
 
   bld.bld_base.emit_var_decl = emit_var_decl;
2667
 
   bld.bld_base.load_ubo = emit_load_ubo;
2668
 
   bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2669
 
   bld.bld_base.load_global = emit_load_global;
2670
 
   bld.bld_base.store_global = emit_store_global;
2671
 
   bld.bld_base.atomic_global = emit_atomic_global;
2672
 
   bld.bld_base.tex = emit_tex;
2673
 
   bld.bld_base.tex_size = emit_tex_size;
2674
 
   bld.bld_base.bgnloop = bgnloop;
2675
 
   bld.bld_base.endloop = endloop;
2676
 
   bld.bld_base.if_cond = if_cond;
2677
 
   bld.bld_base.else_stmt = else_stmt;
2678
 
   bld.bld_base.endif_stmt = endif_stmt;
2679
 
   bld.bld_base.break_stmt = break_stmt;
2680
 
   bld.bld_base.continue_stmt = continue_stmt;
2681
 
   bld.bld_base.sysval_intrin = emit_sysval_intrin;
2682
 
   bld.bld_base.discard = discard;
2683
 
   bld.bld_base.emit_vertex = emit_vertex;
2684
 
   bld.bld_base.end_primitive = end_primitive;
2685
 
   bld.bld_base.load_mem = emit_load_mem;
2686
 
   bld.bld_base.store_mem = emit_store_mem;
2687
 
   bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2688
 
   bld.bld_base.atomic_mem = emit_atomic_mem;
2689
 
   bld.bld_base.barrier = emit_barrier;
2690
 
   bld.bld_base.image_op = emit_image_op;
2691
 
   bld.bld_base.image_size = emit_image_size;
2692
 
   bld.bld_base.vote = emit_vote;
2693
 
   bld.bld_base.elect = emit_elect;
2694
 
   bld.bld_base.reduce = emit_reduce;
2695
 
   bld.bld_base.ballot = emit_ballot;
2696
 
#if LLVM_VERSION_MAJOR >= 10
2697
 
   bld.bld_base.shuffle = emit_shuffle;
2698
 
#endif
2699
 
   bld.bld_base.read_invocation = emit_read_invocation;
2700
 
   bld.bld_base.helper_invocation = emit_helper_invocation;
2701
 
   bld.bld_base.interp_at = emit_interp_at;
2702
 
   bld.bld_base.load_scratch = emit_load_scratch;
2703
 
   bld.bld_base.store_scratch = emit_store_scratch;
2704
 
   bld.bld_base.load_const = emit_load_const;
2705
 
 
2706
 
   bld.mask = params->mask;
2707
 
   bld.inputs = params->inputs;
2708
 
   bld.outputs = outputs;
2709
 
   bld.consts_ptr = params->consts_ptr;
2710
 
   bld.const_sizes_ptr = params->const_sizes_ptr;
2711
 
   bld.ssbo_ptr = params->ssbo_ptr;
2712
 
   bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2713
 
   bld.sampler = params->sampler;
2714
 
//   bld.bld_base.info = params->info;
2715
 
 
2716
 
   bld.context_ptr = params->context_ptr;
2717
 
   bld.thread_data_ptr = params->thread_data_ptr;
2718
 
   bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2719
 
   bld.image = params->image;
2720
 
   bld.shared_ptr = params->shared_ptr;
2721
 
   bld.coro = params->coro;
2722
 
   bld.kernel_args_ptr = params->kernel_args;
2723
 
   bld.indirects = 0;
2724
 
   if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2725
 
      bld.indirects |= nir_var_shader_in;
2726
 
 
2727
 
   bld.gs_iface = params->gs_iface;
2728
 
   bld.tcs_iface = params->tcs_iface;
2729
 
   bld.tes_iface = params->tes_iface;
2730
 
   bld.fs_iface = params->fs_iface;
2731
 
   if (bld.gs_iface) {
2732
 
      struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2733
 
 
2734
 
      bld.gs_vertex_streams = params->gs_vertex_streams;
2735
 
      bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2736
 
                                                           shader->info.gs.vertices_out);
2737
 
      for (int i = 0; i < params->gs_vertex_streams; i++) {
2738
 
         bld.emitted_prims_vec_ptr[i] =
2739
 
            lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2740
 
         bld.emitted_vertices_vec_ptr[i] =
2741
 
            lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2742
 
         bld.total_emitted_vertices_vec_ptr[i] =
2743
 
            lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2744
 
      }
2745
 
   }
2746
 
   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2747
 
 
2748
 
   bld.system_values = *params->system_values;
2749
 
 
2750
 
   bld.bld_base.shader = shader;
2751
 
 
2752
 
   if (shader->scratch_size) {
2753
 
      bld.scratch_ptr = lp_build_array_alloca(gallivm,
2754
 
                                              LLVMInt8TypeInContext(gallivm->context),
2755
 
                                              lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2756
 
                                              "scratch");
2757
 
   }
2758
 
   bld.scratch_size = shader->scratch_size;
2759
 
   emit_prologue(&bld);
2760
 
   lp_build_nir_llvm(&bld.bld_base, shader);
2761
 
 
2762
 
   if (bld.gs_iface) {
2763
 
      LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2764
 
      LLVMValueRef total_emitted_vertices_vec;
2765
 
      LLVMValueRef emitted_prims_vec;
2766
 
 
2767
 
      for (int i = 0; i < params->gs_vertex_streams; i++) {
2768
 
         end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2769
 
 
2770
 
         total_emitted_vertices_vec =
2771
 
            LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2772
 
 
2773
 
         emitted_prims_vec =
2774
 
            LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2775
 
         bld.gs_iface->gs_epilogue(bld.gs_iface,
2776
 
                                   total_emitted_vertices_vec,
2777
 
                                   emitted_prims_vec, i);
2778
 
      }
2779
 
   }
2780
 
   lp_exec_mask_fini(&bld.exec_mask);
2781
 
}