2
* Copyright © 2016 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
25
#include "ir_builder.h"
26
#include "ir_optimization.h"
27
#include "ir_hierarchical_visitor.h"
28
#include "program/prog_instruction.h"
29
#include "program/prog_statevars.h"
30
#include "util/bitscan.h"
31
#include "builtin_functions.h"
32
#include "main/shader_types.h"
34
using namespace ir_builder;
36
#define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
37
#define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
40
blend_multiply(ir_variable *src, ir_variable *dst)
42
/* f(Cs,Cd) = Cs*Cd */
47
blend_screen(ir_variable *src, ir_variable *dst)
49
/* f(Cs,Cd) = Cs+Cd-Cs*Cd */
50
return sub(add(src, dst), mul(src, dst));
54
blend_overlay(ir_variable *src, ir_variable *dst)
56
void *mem_ctx = ralloc_parent(src);
58
/* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
59
* 1-2*(1-Cs)*(1-Cd), otherwise
61
ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
63
sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
64
return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2);
68
blend_darken(ir_variable *src, ir_variable *dst)
70
/* f(Cs,Cd) = min(Cs,Cd) */
71
return min2(src, dst);
75
blend_lighten(ir_variable *src, ir_variable *dst)
77
/* f(Cs,Cd) = max(Cs,Cd) */
78
return max2(src, dst);
82
blend_colordodge(ir_variable *src, ir_variable *dst)
84
void *mem_ctx = ralloc_parent(src);
88
* min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
89
* 1, if Cd > 0 and Cs >= 1
91
return csel(lequal(dst, imm3(0)), imm3(0),
92
csel(gequal(src, imm3(1)), imm3(1),
93
min2(imm3(1), div(dst, sub(imm3(1), src)))));
97
blend_colorburn(ir_variable *src, ir_variable *dst)
99
void *mem_ctx = ralloc_parent(src);
103
* 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
104
* 0, if Cd < 1 and Cs <= 0
106
return csel(gequal(dst, imm3(1)), imm3(1),
107
csel(lequal(src, imm3(0)), imm3(0),
108
sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src)))));
112
blend_hardlight(ir_variable *src, ir_variable *dst)
114
void *mem_ctx = ralloc_parent(src);
116
/* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
117
* 1-2*(1-Cs)*(1-Cd), otherwise
119
ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
121
sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
122
return csel(lequal(src, imm3(0.5f)), rule_1, rule_2);
126
blend_softlight(ir_variable *src, ir_variable *dst)
128
void *mem_ctx = ralloc_parent(src);
131
* Cd-(1-2*Cs)*Cd*(1-Cd),
133
* Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
134
* if Cs > 0.5 and Cd <= 0.25
135
* Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
136
* if Cs > 0.5 and Cd > 0.25
138
* We can simplify this to
140
* f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
141
* g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
142
* Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
143
* sqrt(Cd)-Cd, otherwise
145
ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst));
146
ir_rvalue *factor_2 =
147
mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3)));
148
ir_rvalue *factor_3 = sub(sqrt(dst), dst);
149
ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1,
150
csel(lequal(dst, imm3(0.25f)),
151
factor_2, factor_3));
152
return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor));
156
blend_difference(ir_variable *src, ir_variable *dst)
158
return abs(sub(dst, src));
162
blend_exclusion(ir_variable *src, ir_variable *dst)
164
void *mem_ctx = ralloc_parent(src);
166
return add(src, sub(dst, mul(imm3(2), mul(src, dst))));
169
/* Return the minimum of a vec3's components */
171
minv3(ir_variable *v)
173
return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
176
/* Return the maximum of a vec3's components */
178
maxv3(ir_variable *v)
180
return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
184
lumv3(ir_variable *c)
186
ir_constant_data data;
191
void *mem_ctx = ralloc_parent(c);
193
/* dot(c, vec3(0.30, 0.59, 0.11)) */
194
return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data));
198
satv3(ir_variable *c)
200
return sub(maxv3(c), minv3(c));
203
/* Take the base RGB color <cbase> and override its luminosity with that
204
* of the RGB color <clum>.
206
* This follows the equations given in the ES 3.2 (June 15th, 2016)
207
* specification. Revision 16 of GL_KHR_blend_equation_advanced and
208
* revision 9 of GL_NV_blend_equation_advanced specify a different set
209
* of equations. Older revisions match ES 3.2's text, and dEQP expects
210
* the ES 3.2 rules implemented here.
213
set_lum(ir_factory *f,
218
void *mem_ctx = f->mem_ctx;
219
f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase)))));
221
ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum");
222
ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol");
223
ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol");
225
f->emit(assign(llum, lumv3(color)));
226
f->emit(assign(mincol, minv3(color)));
227
f->emit(assign(maxcol, maxv3(color)));
229
f->emit(if_tree(less(mincol, imm1(0)),
230
assign(color, add(llum, div(mul(sub(color, llum), llum),
231
sub(llum, mincol)))),
232
if_tree(greater(maxcol, imm1(1)),
233
assign(color, add(llum, div(mul(sub(color, llum),
235
sub(maxcol, llum)))))));
239
/* Take the base RGB color <cbase> and override its saturation with
240
* that of the RGB color <csat>. The override the luminosity of the
241
* result with that of the RGB color <clum>.
244
set_lum_sat(ir_factory *f,
250
void *mem_ctx = f->mem_ctx;
252
ir_rvalue *minbase = minv3(cbase);
253
ir_rvalue *ssat = satv3(csat);
255
ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase");
256
f->emit(assign(sbase, satv3(cbase)));
258
/* Equivalent (modulo rounding errors) to setting the
259
* smallest (R,G,B) component to 0, the largest to <ssat>,
260
* and interpolating the "middle" component based on its
261
* original value relative to the smallest/largest.
263
f->emit(if_tree(greater(sbase, imm1(0)),
264
assign(color, div(mul(sub(cbase, minbase), ssat), sbase)),
265
assign(color, imm3(0))));
266
set_lum(f, color, color, clum);
270
is_mode(ir_variable *mode, enum gl_advanced_blend_mode q)
272
return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q)));
276
calc_blend_result(ir_factory f,
279
ir_rvalue *blend_src,
280
GLbitfield blend_qualifiers)
282
void *mem_ctx = f.mem_ctx;
283
ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result");
285
/* Save blend_src to a temporary so we can reference it multiple times. */
286
ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src");
287
f.emit(assign(src, blend_src));
289
/* If we're not doing advanced blending, just write the original value. */
290
ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE));
292
if_blending->then_instructions.push_tail(assign(result, src));
294
f.instructions = &if_blending->else_instructions;
297
* (0, 0, 0), if As == 0
298
* (Rs/As, Gs/As, Bs/As), otherwise
300
ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb");
301
ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a");
304
* (0, 0, 0), if Ad == 0
305
* (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
307
ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb");
308
ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a");
310
f.emit(assign(dst_alpha, swizzle_w(fb)));
311
f.emit(if_tree(equal(dst_alpha, imm1(0)),
312
assign(dst_rgb, imm3(0)),
313
assign(dst_rgb, csel(equal(swizzle_xyz(fb),
314
swizzle(fb, SWIZZLE_WWWW, 3)),
316
div(swizzle_xyz(fb), dst_alpha)))));
318
f.emit(assign(src_alpha, swizzle_w(src)));
319
f.emit(if_tree(equal(src_alpha, imm1(0)),
320
assign(src_rgb, imm3(0)),
321
assign(src_rgb, csel(equal(swizzle_xyz(src),
322
swizzle(src, SWIZZLE_WWWW, 3)),
324
div(swizzle_xyz(src), src_alpha)))));
326
ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");
328
ir_factory casefactory = f;
330
unsigned choices = blend_qualifiers;
332
enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)u_bit_scan(&choices);
334
ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice));
335
casefactory.emit(iff);
336
casefactory.instructions = &iff->then_instructions;
338
ir_rvalue *val = NULL;
342
val = blend_multiply(src_rgb, dst_rgb);
345
val = blend_screen(src_rgb, dst_rgb);
348
val = blend_overlay(src_rgb, dst_rgb);
351
val = blend_darken(src_rgb, dst_rgb);
354
val = blend_lighten(src_rgb, dst_rgb);
356
case BLEND_COLORDODGE:
357
val = blend_colordodge(src_rgb, dst_rgb);
359
case BLEND_COLORBURN:
360
val = blend_colorburn(src_rgb, dst_rgb);
362
case BLEND_HARDLIGHT:
363
val = blend_hardlight(src_rgb, dst_rgb);
365
case BLEND_SOFTLIGHT:
366
val = blend_softlight(src_rgb, dst_rgb);
368
case BLEND_DIFFERENCE:
369
val = blend_difference(src_rgb, dst_rgb);
371
case BLEND_EXCLUSION:
372
val = blend_exclusion(src_rgb, dst_rgb);
375
set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb);
377
case BLEND_HSL_SATURATION:
378
set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb);
380
case BLEND_HSL_COLOR:
381
set_lum(&casefactory, factor, src_rgb, dst_rgb);
383
case BLEND_HSL_LUMINOSITY:
384
set_lum(&casefactory, factor, dst_rgb, src_rgb);
387
unreachable("not real cases");
391
casefactory.emit(assign(factor, val));
393
casefactory.instructions = &iff->else_instructions;
397
* p1(As,Ad) = As*(1-Ad)
398
* p2(As,Ad) = Ad*(1-As)
400
ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0");
401
ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1");
402
ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2");
404
f.emit(assign(p0, mul(src_alpha, dst_alpha)));
405
f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha))));
406
f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha))));
408
/* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
409
* G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
410
* B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
411
* A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad)
413
* <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
415
* In vector form, this is:
416
* RGB = factor * p0 + Cs * p1 + Cd * p2
419
f.emit(assign(result,
420
add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)),
422
f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W));
428
* Dereference var, or var[0] if it's an array.
430
static ir_dereference *
431
deref_output(ir_variable *var)
433
void *mem_ctx = ralloc_parent(var);
435
ir_dereference *val = new(mem_ctx) ir_dereference_variable(var);
436
if (val->type->is_array()) {
437
ir_constant *index = new(mem_ctx) ir_constant(0);
438
val = new(mem_ctx) ir_dereference_array(val, index);
444
static ir_function_signature *
445
get_main(gl_linked_shader *sh)
447
ir_function_signature *sig = NULL;
448
/* We can't use _mesa_get_main_function_signature() because we don't
449
* have a symbol table at this point. Just go find main() by hand.
451
foreach_in_list(ir_instruction, ir, sh->ir) {
452
ir_function *f = ir->as_function();
453
if (f && strcmp(f->name, "main") == 0) {
454
exec_list void_parameters;
455
sig = f->matching_signature(NULL, &void_parameters, false);
459
assert(sig != NULL); /* main() must exist */
464
lower_blend_equation_advanced(struct gl_linked_shader *sh, bool coherent)
466
assert(sh->Stage == MESA_SHADER_FRAGMENT);
468
if (sh->Program->info.fs.advanced_blend_modes == 0)
471
/* Lower early returns in main() so there's a single exit point
472
* where we can insert our lowering code.
474
do_lower_jumps(sh->ir, false, false, true, false, false);
476
void *mem_ctx = ralloc_parent(sh->ir);
478
ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type,
481
fb->data.location = FRAG_RESULT_DATA0;
482
fb->data.read_only = 1;
483
fb->data.fb_fetch_output = 1;
484
fb->data.memory_coherent = coherent;
485
fb->data.how_declared = ir_var_hidden;
487
ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
488
"gl_AdvancedBlendModeMESA",
490
mode->data.how_declared = ir_var_hidden;
491
mode->allocate_state_slots(1);
492
ir_state_slot *slot0 = &mode->get_state_slots()[0];
493
slot0->swizzle = SWIZZLE_XXXX;
494
slot0->tokens[0] = STATE_ADVANCED_BLENDING_MODE;
495
for (int i = 1; i < STATE_LENGTH; i++)
496
slot0->tokens[i] = 0;
498
sh->ir->push_head(fb);
499
sh->ir->push_head(mode);
501
/* Gather any output variables referring to render target 0.
503
* ARB_enhanced_layouts irritatingly allows the shader to specify
504
* multiple output variables for the same render target, each of
505
* which writes a subset of the components, starting at location_frac.
506
* The variables can't overlap, thankfully.
508
ir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
509
foreach_in_list(ir_instruction, ir, sh->ir) {
510
ir_variable *var = ir->as_variable();
511
if (!var || var->data.mode != ir_var_shader_out)
514
if (var->data.location == FRAG_RESULT_DATA0 ||
515
var->data.location == FRAG_RESULT_COLOR) {
516
const int components = var->type->without_array()->vector_elements;
518
for (int i = 0; i < components; i++) {
519
outputs[var->data.location_frac + i] = var;
524
/* Combine values written to outputs into a single RGBA blend source.
525
* We assign <0, 0, 0, 1> to any components with no corresponding output.
527
ir_rvalue *blend_source;
528
if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) {
529
blend_source = deref_output(outputs[0]);
531
ir_rvalue *blend_comps[4];
532
for (int i = 0; i < 4; i++) {
533
ir_variable *var = outputs[i];
535
blend_comps[i] = swizzle(deref_output(outputs[i]),
536
i - outputs[i]->data.location_frac, 1);
538
blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f);
543
new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type,
544
blend_comps[0], blend_comps[1],
545
blend_comps[2], blend_comps[3]);
548
ir_function_signature *main = get_main(sh);
549
ir_factory f(&main->body, mem_ctx);
551
ir_variable *result_dest =
552
calc_blend_result(f, mode, fb, blend_source,
553
sh->Program->info.fs.advanced_blend_modes);
555
/* Copy the result back to the original values. It would be simpler
556
* to demote the program's output variables, and create a new vec4
557
* output for our result, but this pass runs before we create the
558
* ARB_program_interface_query resource list. So we have to leave
559
* the original outputs in place and use them.
561
for (int i = 0; i < 4; i++) {
565
f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1),
569
validate_ir_tree(sh->ir);