~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/drivers/vc4/vc4_nir_lower_blend.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright © 2015 Broadcom
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 
 * copy of this software and associated documentation files (the "Software"),
6
 
 * to deal in the Software without restriction, including without limitation
7
 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 
 * and/or sell copies of the Software, and to permit persons to whom the
9
 
 * Software is furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 
 * IN THE SOFTWARE.
22
 
 */
23
 
 
24
 
/**
25
 
 * Implements most of the fixed function fragment pipeline in shader code.
26
 
 *
27
 
 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28
 
 * or color mask.  Instead, you read the current contents of the destination
29
 
 * from the tile buffer after having waited for the scoreboard (which is
30
 
 * handled by vc4_qpu_emit.c), then do math using your output color and that
31
 
 * destination value, and update the output color appropriately.
32
 
 *
33
 
 * Once this pass is done, the color write will either have one component (for
34
 
 * single sample) with packed argb8888, or 4 components with the per-sample
35
 
 * argb8888 result.
36
 
 */
37
 
 
38
 
/**
39
 
 * Lowers fixed-function blending to a load of the destination color and a
40
 
 * series of ALU operations before the store of the output.
41
 
 */
42
 
#include "util/format/u_format.h"
43
 
#include "vc4_qir.h"
44
 
#include "compiler/nir/nir_builder.h"
45
 
#include "compiler/nir/nir_format_convert.h"
46
 
#include "vc4_context.h"
47
 
 
48
 
static bool
49
 
blend_depends_on_dst_color(struct vc4_compile *c)
50
 
{
51
 
        return (c->fs_key->blend.blend_enable ||
52
 
                c->fs_key->blend.colormask != 0xf ||
53
 
                c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54
 
}
55
 
 
56
 
/** Emits a load of the previous fragment color from the tile buffer. */
57
 
static nir_ssa_def *
58
 
vc4_nir_get_dst_color(nir_builder *b, int sample)
59
 
{
60
 
        return nir_load_input(b, 1, 32, nir_imm_int(b, 0),
61
 
                              .base = VC4_NIR_TLB_COLOR_READ_INPUT + sample);
62
 
}
63
 
 
64
 
static nir_ssa_def *
65
 
vc4_blend_channel_f(nir_builder *b,
66
 
                    nir_ssa_def **src,
67
 
                    nir_ssa_def **dst,
68
 
                    unsigned factor,
69
 
                    int channel)
70
 
{
71
 
        switch(factor) {
72
 
        case PIPE_BLENDFACTOR_ONE:
73
 
                return nir_imm_float(b, 1.0);
74
 
        case PIPE_BLENDFACTOR_SRC_COLOR:
75
 
                return src[channel];
76
 
        case PIPE_BLENDFACTOR_SRC_ALPHA:
77
 
                return src[3];
78
 
        case PIPE_BLENDFACTOR_DST_ALPHA:
79
 
                return dst[3];
80
 
        case PIPE_BLENDFACTOR_DST_COLOR:
81
 
                return dst[channel];
82
 
        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
83
 
                if (channel != 3) {
84
 
                        return nir_fmin(b,
85
 
                                        src[3],
86
 
                                        nir_fsub(b,
87
 
                                                 nir_imm_float(b, 1.0),
88
 
                                                 dst[3]));
89
 
                } else {
90
 
                        return nir_imm_float(b, 1.0);
91
 
                }
92
 
        case PIPE_BLENDFACTOR_CONST_COLOR:
93
 
                return nir_load_system_value(b,
94
 
                                             nir_intrinsic_load_blend_const_color_r_float +
95
 
                                             channel,
96
 
                                             0, 1, 32);
97
 
        case PIPE_BLENDFACTOR_CONST_ALPHA:
98
 
                return nir_load_blend_const_color_a_float(b);
99
 
        case PIPE_BLENDFACTOR_ZERO:
100
 
                return nir_imm_float(b, 0.0);
101
 
        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
102
 
                return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
103
 
        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
104
 
                return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
105
 
        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
106
 
                return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
107
 
        case PIPE_BLENDFACTOR_INV_DST_COLOR:
108
 
                return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
109
 
        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
110
 
                return nir_fsub(b, nir_imm_float(b, 1.0),
111
 
                                nir_load_system_value(b,
112
 
                                                      nir_intrinsic_load_blend_const_color_r_float +
113
 
                                                      channel,
114
 
                                                      0, 1, 32));
115
 
        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
116
 
                return nir_fsub(b, nir_imm_float(b, 1.0),
117
 
                                nir_load_blend_const_color_a_float(b));
118
 
 
119
 
        default:
120
 
        case PIPE_BLENDFACTOR_SRC1_COLOR:
121
 
        case PIPE_BLENDFACTOR_SRC1_ALPHA:
122
 
        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
123
 
        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
124
 
                /* Unsupported. */
125
 
                fprintf(stderr, "Unknown blend factor %d\n", factor);
126
 
                return nir_imm_float(b, 1.0);
127
 
        }
128
 
}
129
 
 
130
 
static nir_ssa_def *
131
 
vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
132
 
                        int chan)
133
 
{
134
 
        unsigned chan_mask = 0xff << (chan * 8);
135
 
        return nir_ior(b,
136
 
                       nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
137
 
                       nir_iand(b, src1, nir_imm_int(b, chan_mask)));
138
 
}
139
 
 
140
 
static nir_ssa_def *
141
 
vc4_blend_channel_i(nir_builder *b,
142
 
                    nir_ssa_def *src,
143
 
                    nir_ssa_def *dst,
144
 
                    nir_ssa_def *src_a,
145
 
                    nir_ssa_def *dst_a,
146
 
                    unsigned factor,
147
 
                    int a_chan)
148
 
{
149
 
        switch (factor) {
150
 
        case PIPE_BLENDFACTOR_ONE:
151
 
                return nir_imm_int(b, ~0);
152
 
        case PIPE_BLENDFACTOR_SRC_COLOR:
153
 
                return src;
154
 
        case PIPE_BLENDFACTOR_SRC_ALPHA:
155
 
                return src_a;
156
 
        case PIPE_BLENDFACTOR_DST_ALPHA:
157
 
                return dst_a;
158
 
        case PIPE_BLENDFACTOR_DST_COLOR:
159
 
                return dst;
160
 
        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
161
 
                return vc4_nir_set_packed_chan(b,
162
 
                                               nir_umin_4x8_vc4(b,
163
 
                                                            src_a,
164
 
                                                            nir_inot(b, dst_a)),
165
 
                                               nir_imm_int(b, ~0),
166
 
                                               a_chan);
167
 
        case PIPE_BLENDFACTOR_CONST_COLOR:
168
 
                return nir_load_blend_const_color_rgba8888_unorm(b);
169
 
        case PIPE_BLENDFACTOR_CONST_ALPHA:
170
 
                return nir_load_blend_const_color_aaaa8888_unorm(b);
171
 
        case PIPE_BLENDFACTOR_ZERO:
172
 
                return nir_imm_int(b, 0);
173
 
        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
174
 
                return nir_inot(b, src);
175
 
        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
176
 
                return nir_inot(b, src_a);
177
 
        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
178
 
                return nir_inot(b, dst_a);
179
 
        case PIPE_BLENDFACTOR_INV_DST_COLOR:
180
 
                return nir_inot(b, dst);
181
 
        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
182
 
                return nir_inot(b,
183
 
                                nir_load_blend_const_color_rgba8888_unorm(b));
184
 
        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
185
 
                return nir_inot(b,
186
 
                                nir_load_blend_const_color_aaaa8888_unorm(b));
187
 
 
188
 
        default:
189
 
        case PIPE_BLENDFACTOR_SRC1_COLOR:
190
 
        case PIPE_BLENDFACTOR_SRC1_ALPHA:
191
 
        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
192
 
        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
193
 
                /* Unsupported. */
194
 
                fprintf(stderr, "Unknown blend factor %d\n", factor);
195
 
                return nir_imm_int(b, ~0);
196
 
        }
197
 
}
198
 
 
199
 
static nir_ssa_def *
200
 
vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
201
 
                 unsigned func)
202
 
{
203
 
        switch (func) {
204
 
        case PIPE_BLEND_ADD:
205
 
                return nir_fadd(b, src, dst);
206
 
        case PIPE_BLEND_SUBTRACT:
207
 
                return nir_fsub(b, src, dst);
208
 
        case PIPE_BLEND_REVERSE_SUBTRACT:
209
 
                return nir_fsub(b, dst, src);
210
 
        case PIPE_BLEND_MIN:
211
 
                return nir_fmin(b, src, dst);
212
 
        case PIPE_BLEND_MAX:
213
 
                return nir_fmax(b, src, dst);
214
 
 
215
 
        default:
216
 
                /* Unsupported. */
217
 
                fprintf(stderr, "Unknown blend func %d\n", func);
218
 
                return src;
219
 
 
220
 
        }
221
 
}
222
 
 
223
 
static nir_ssa_def *
224
 
vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
225
 
                 unsigned func)
226
 
{
227
 
        switch (func) {
228
 
        case PIPE_BLEND_ADD:
229
 
                return nir_usadd_4x8_vc4(b, src, dst);
230
 
        case PIPE_BLEND_SUBTRACT:
231
 
                return nir_ussub_4x8_vc4(b, src, dst);
232
 
        case PIPE_BLEND_REVERSE_SUBTRACT:
233
 
                return nir_ussub_4x8_vc4(b, dst, src);
234
 
        case PIPE_BLEND_MIN:
235
 
                return nir_umin_4x8_vc4(b, src, dst);
236
 
        case PIPE_BLEND_MAX:
237
 
                return nir_umax_4x8_vc4(b, src, dst);
238
 
 
239
 
        default:
240
 
                /* Unsupported. */
241
 
                fprintf(stderr, "Unknown blend func %d\n", func);
242
 
                return src;
243
 
 
244
 
        }
245
 
}
246
 
 
247
 
static void
248
 
vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
249
 
                  nir_ssa_def **src_color, nir_ssa_def **dst_color)
250
 
{
251
 
        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
252
 
 
253
 
        if (!blend->blend_enable) {
254
 
                for (int i = 0; i < 4; i++)
255
 
                        result[i] = src_color[i];
256
 
                return;
257
 
        }
258
 
 
259
 
        /* Clamp the src color to [0, 1].  Dest is already clamped. */
260
 
        for (int i = 0; i < 4; i++)
261
 
                src_color[i] = nir_fsat(b, src_color[i]);
262
 
 
263
 
        nir_ssa_def *src_blend[4], *dst_blend[4];
264
 
        for (int i = 0; i < 4; i++) {
265
 
                int src_factor = ((i != 3) ? blend->rgb_src_factor :
266
 
                                  blend->alpha_src_factor);
267
 
                int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
268
 
                                  blend->alpha_dst_factor);
269
 
                src_blend[i] = nir_fmul(b, src_color[i],
270
 
                                        vc4_blend_channel_f(b,
271
 
                                                            src_color, dst_color,
272
 
                                                            src_factor, i));
273
 
                dst_blend[i] = nir_fmul(b, dst_color[i],
274
 
                                        vc4_blend_channel_f(b,
275
 
                                                            src_color, dst_color,
276
 
                                                            dst_factor, i));
277
 
        }
278
 
 
279
 
        for (int i = 0; i < 4; i++) {
280
 
                result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
281
 
                                             ((i != 3) ? blend->rgb_func :
282
 
                                              blend->alpha_func));
283
 
        }
284
 
}
285
 
 
286
 
static nir_ssa_def *
287
 
vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
288
 
{
289
 
        nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
290
 
        return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
291
 
}
292
 
 
293
 
static nir_ssa_def *
294
 
vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
295
 
                  nir_ssa_def *src_color, nir_ssa_def *dst_color,
296
 
                  nir_ssa_def *src_float_a)
297
 
{
298
 
        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
299
 
 
300
 
        if (!blend->blend_enable)
301
 
                return src_color;
302
 
 
303
 
        enum pipe_format color_format = c->fs_key->color_format;
304
 
        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
305
 
        nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
306
 
        nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
307
 
        nir_ssa_def *dst_a;
308
 
        int alpha_chan;
309
 
        for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
310
 
                if (format_swiz[alpha_chan] == 3)
311
 
                        break;
312
 
        }
313
 
        if (alpha_chan != 4) {
314
 
                nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
315
 
                dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
316
 
                                                              shift), imm_0xff));
317
 
        } else {
318
 
                dst_a = nir_imm_int(b, ~0);
319
 
        }
320
 
 
321
 
        nir_ssa_def *src_factor = vc4_blend_channel_i(b,
322
 
                                                      src_color, dst_color,
323
 
                                                      src_a, dst_a,
324
 
                                                      blend->rgb_src_factor,
325
 
                                                      alpha_chan);
326
 
        nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
327
 
                                                      src_color, dst_color,
328
 
                                                      src_a, dst_a,
329
 
                                                      blend->rgb_dst_factor,
330
 
                                                      alpha_chan);
331
 
 
332
 
        if (alpha_chan != 4 &&
333
 
            blend->alpha_src_factor != blend->rgb_src_factor) {
334
 
                nir_ssa_def *src_alpha_factor =
335
 
                        vc4_blend_channel_i(b,
336
 
                                            src_color, dst_color,
337
 
                                            src_a, dst_a,
338
 
                                            blend->alpha_src_factor,
339
 
                                            alpha_chan);
340
 
                src_factor = vc4_nir_set_packed_chan(b, src_factor,
341
 
                                                     src_alpha_factor,
342
 
                                                     alpha_chan);
343
 
        }
344
 
        if (alpha_chan != 4 &&
345
 
            blend->alpha_dst_factor != blend->rgb_dst_factor) {
346
 
                nir_ssa_def *dst_alpha_factor =
347
 
                        vc4_blend_channel_i(b,
348
 
                                            src_color, dst_color,
349
 
                                            src_a, dst_a,
350
 
                                            blend->alpha_dst_factor,
351
 
                                            alpha_chan);
352
 
                dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
353
 
                                                     dst_alpha_factor,
354
 
                                                     alpha_chan);
355
 
        }
356
 
        nir_ssa_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);
357
 
        nir_ssa_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);
358
 
 
359
 
        nir_ssa_def *result =
360
 
                vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
361
 
        if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
362
 
                nir_ssa_def *result_a = vc4_blend_func_i(b,
363
 
                                                         src_blend,
364
 
                                                         dst_blend,
365
 
                                                         blend->alpha_func);
366
 
                result = vc4_nir_set_packed_chan(b, result, result_a,
367
 
                                                 alpha_chan);
368
 
        }
369
 
        return result;
370
 
}
371
 
 
372
 
static nir_ssa_def *
373
 
vc4_logicop(nir_builder *b, int logicop_func,
374
 
            nir_ssa_def *src, nir_ssa_def *dst)
375
 
{
376
 
        switch (logicop_func) {
377
 
        case PIPE_LOGICOP_CLEAR:
378
 
                return nir_imm_int(b, 0);
379
 
        case PIPE_LOGICOP_NOR:
380
 
                return nir_inot(b, nir_ior(b, src, dst));
381
 
        case PIPE_LOGICOP_AND_INVERTED:
382
 
                return nir_iand(b, nir_inot(b, src), dst);
383
 
        case PIPE_LOGICOP_COPY_INVERTED:
384
 
                return nir_inot(b, src);
385
 
        case PIPE_LOGICOP_AND_REVERSE:
386
 
                return nir_iand(b, src, nir_inot(b, dst));
387
 
        case PIPE_LOGICOP_INVERT:
388
 
                return nir_inot(b, dst);
389
 
        case PIPE_LOGICOP_XOR:
390
 
                return nir_ixor(b, src, dst);
391
 
        case PIPE_LOGICOP_NAND:
392
 
                return nir_inot(b, nir_iand(b, src, dst));
393
 
        case PIPE_LOGICOP_AND:
394
 
                return nir_iand(b, src, dst);
395
 
        case PIPE_LOGICOP_EQUIV:
396
 
                return nir_inot(b, nir_ixor(b, src, dst));
397
 
        case PIPE_LOGICOP_NOOP:
398
 
                return dst;
399
 
        case PIPE_LOGICOP_OR_INVERTED:
400
 
                return nir_ior(b, nir_inot(b, src), dst);
401
 
        case PIPE_LOGICOP_OR_REVERSE:
402
 
                return nir_ior(b, src, nir_inot(b, dst));
403
 
        case PIPE_LOGICOP_OR:
404
 
                return nir_ior(b, src, dst);
405
 
        case PIPE_LOGICOP_SET:
406
 
                return nir_imm_int(b, ~0);
407
 
        default:
408
 
                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
409
 
                FALLTHROUGH;
410
 
        case PIPE_LOGICOP_COPY:
411
 
                return src;
412
 
        }
413
 
}
414
 
 
415
 
static nir_ssa_def *
416
 
vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
417
 
                         nir_ssa_def **colors)
418
 
{
419
 
        enum pipe_format color_format = c->fs_key->color_format;
420
 
        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
421
 
 
422
 
        nir_ssa_def *swizzled[4];
423
 
        for (int i = 0; i < 4; i++) {
424
 
                swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
425
 
                                                           format_swiz[i]);
426
 
        }
427
 
 
428
 
        return nir_pack_unorm_4x8(b,
429
 
                                  nir_vec4(b,
430
 
                                           swizzled[0], swizzled[1],
431
 
                                           swizzled[2], swizzled[3]));
432
 
 
433
 
}
434
 
 
435
 
static nir_ssa_def *
436
 
vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
437
 
                       int sample)
438
 
{
439
 
        enum pipe_format color_format = c->fs_key->color_format;
440
 
        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
441
 
        bool srgb = util_format_is_srgb(color_format);
442
 
 
443
 
        /* Pull out the float src/dst color components. */
444
 
        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
445
 
        nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
446
 
        nir_ssa_def *src_color[4], *unpacked_dst_color[4];
447
 
        for (unsigned i = 0; i < 4; i++) {
448
 
                src_color[i] = nir_channel(b, src, i);
449
 
                unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
450
 
        }
451
 
 
452
 
        if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
453
 
                src_color[3] = nir_imm_float(b, 1.0);
454
 
 
455
 
        nir_ssa_def *packed_color;
456
 
        if (srgb) {
457
 
                /* Unswizzle the destination color. */
458
 
                nir_ssa_def *dst_color[4];
459
 
                for (unsigned i = 0; i < 4; i++) {
460
 
                        dst_color[i] = vc4_nir_get_swizzled_channel(b,
461
 
                                                                    unpacked_dst_color,
462
 
                                                                    format_swiz[i]);
463
 
                }
464
 
 
465
 
                /* Turn dst color to linear. */
466
 
                for (int i = 0; i < 3; i++)
467
 
                        dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
468
 
 
469
 
                nir_ssa_def *blend_color[4];
470
 
                vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
471
 
 
472
 
                /* sRGB encode the output color */
473
 
                for (int i = 0; i < 3; i++)
474
 
                        blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
475
 
 
476
 
                packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
477
 
        } else {
478
 
                nir_ssa_def *packed_src_color =
479
 
                        vc4_nir_swizzle_and_pack(c, b, src_color);
480
 
 
481
 
                packed_color =
482
 
                        vc4_do_blending_i(c, b,
483
 
                                          packed_src_color, packed_dst_color,
484
 
                                          src_color[3]);
485
 
        }
486
 
 
487
 
        packed_color = vc4_logicop(b, c->fs_key->logicop_func,
488
 
                                   packed_color, packed_dst_color);
489
 
 
490
 
        /* If the bit isn't set in the color mask, then just return the
491
 
         * original dst color, instead.
492
 
         */
493
 
        uint32_t colormask = 0xffffffff;
494
 
        for (int i = 0; i < 4; i++) {
495
 
                if (format_swiz[i] < 4 &&
496
 
                    !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
497
 
                        colormask &= ~(0xff << (i * 8));
498
 
                }
499
 
        }
500
 
 
501
 
        return nir_ior(b,
502
 
                       nir_iand(b, packed_color,
503
 
                                nir_imm_int(b, colormask)),
504
 
                       nir_iand(b, packed_dst_color,
505
 
                                nir_imm_int(b, ~colormask)));
506
 
}
507
 
 
508
 
static void
509
 
vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
510
 
                          nir_ssa_def *val)
511
 
{
512
 
        nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
513
 
                                                        glsl_uint_type(),
514
 
                                                        "sample_mask");
515
 
        sample_mask->data.driver_location = c->s->num_outputs++;
516
 
        sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
517
 
 
518
 
        nir_store_output(b, val, nir_imm_int(b, 0),
519
 
                         .base = sample_mask->data.driver_location);
520
 
}
521
 
 
522
 
static void
523
 
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
524
 
                          nir_intrinsic_instr *intr)
525
 
{
526
 
        nir_ssa_def *frag_color = intr->src[0].ssa;
527
 
 
528
 
        if (c->fs_key->sample_alpha_to_coverage) {
529
 
                nir_ssa_def *a = nir_channel(b, frag_color, 3);
530
 
 
531
 
                /* XXX: We should do a nice dither based on the fragment
532
 
                 * coordinate, instead.
533
 
                 */
534
 
                nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
535
 
                nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
536
 
                nir_ssa_def *bitmask = nir_isub(b,
537
 
                                                nir_ishl(b,
538
 
                                                         nir_imm_int(b, 1),
539
 
                                                         num_bits),
540
 
                                                nir_imm_int(b, 1));
541
 
                vc4_nir_store_sample_mask(c, b, bitmask);
542
 
        }
543
 
 
544
 
        /* The TLB color read returns each sample in turn, so if our blending
545
 
         * depends on the destination color, we're going to have to run the
546
 
         * blending function separately for each destination sample value, and
547
 
         * then output the per-sample color using TLB_COLOR_MS.
548
 
         */
549
 
        nir_ssa_def *blend_output;
550
 
        if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
551
 
                c->msaa_per_sample_output = true;
552
 
 
553
 
                nir_ssa_def *samples[4];
554
 
                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
555
 
                        samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
556
 
                blend_output = nir_vec4(b,
557
 
                                        samples[0], samples[1],
558
 
                                        samples[2], samples[3]);
559
 
        } else {
560
 
                blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
561
 
        }
562
 
 
563
 
        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
564
 
                              nir_src_for_ssa(blend_output));
565
 
        if (intr->num_components != blend_output->num_components) {
566
 
                unsigned component_mask = BITFIELD_MASK(blend_output->num_components);
567
 
                nir_intrinsic_set_write_mask(intr, component_mask);
568
 
                intr->num_components = blend_output->num_components;
569
 
        }
570
 
}
571
 
 
572
 
static bool
573
 
vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
574
 
{
575
 
        nir_foreach_instr_safe(instr, block) {
576
 
                if (instr->type != nir_instr_type_intrinsic)
577
 
                        continue;
578
 
                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
579
 
                if (intr->intrinsic != nir_intrinsic_store_output)
580
 
                        continue;
581
 
 
582
 
                nir_variable *output_var = NULL;
583
 
                nir_foreach_shader_out_variable(var, c->s) {
584
 
                        if (var->data.driver_location ==
585
 
                            nir_intrinsic_base(intr)) {
586
 
                                output_var = var;
587
 
                                break;
588
 
                        }
589
 
                }
590
 
                assert(output_var);
591
 
 
592
 
                if (output_var->data.location != FRAG_RESULT_COLOR &&
593
 
                    output_var->data.location != FRAG_RESULT_DATA0) {
594
 
                        continue;
595
 
                }
596
 
 
597
 
                nir_function_impl *impl =
598
 
                        nir_cf_node_get_function(&block->cf_node);
599
 
                nir_builder b;
600
 
                nir_builder_init(&b, impl);
601
 
                b.cursor = nir_before_instr(&intr->instr);
602
 
                vc4_nir_lower_blend_instr(c, &b, intr);
603
 
        }
604
 
        return true;
605
 
}
606
 
 
607
 
void
608
 
vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
609
 
{
610
 
        nir_foreach_function(function, s) {
611
 
                if (function->impl) {
612
 
                        nir_foreach_block(block, function->impl) {
613
 
                                vc4_nir_lower_blend_block(block, c);
614
 
                        }
615
 
 
616
 
                        nir_metadata_preserve(function->impl,
617
 
                                              nir_metadata_block_index |
618
 
                                              nir_metadata_dominance);
619
 
                }
620
 
        }
621
 
 
622
 
        /* If we didn't do alpha-to-coverage on the output color, we still
623
 
         * need to pass glSampleMask() through.
624
 
         */
625
 
        if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
626
 
                nir_function_impl *impl = nir_shader_get_entrypoint(s);
627
 
                nir_builder b;
628
 
                nir_builder_init(&b, impl);
629
 
                b.cursor = nir_after_block(nir_impl_last_block(impl));
630
 
 
631
 
                vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
632
 
        }
633
 
}