3
* Copyright (c) 2018 Collabora LTD
5
* Author: Gert Wollny <gert.wollny@collabora.com>
7
* Permission is hereby granted, free of charge, to any person obtaining a
8
* copy of this software and associated documentation files (the "Software"),
9
* to deal in the Software without restriction, including without limitation
10
* on the rights to use, copy, modify, merge, publish, distribute, sub
11
* license, and/or sell copies of the Software, and to permit persons to whom
12
* the Software is furnished to do so, subject to the following conditions:
14
* The above copyright notice and this permission notice (including the next
15
* paragraph) shall be included in all copies or substantial portions of the
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24
* USE OR OTHER DEALINGS IN THE SOFTWARE.
28
#include "sfn_emitaluinstruction.h"
29
#include "sfn_debug.h"
31
#include "gallium/drivers/r600/r600_shader.h"
37
EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38
EmitInstruction (processor)
43
bool EmitAluInstruction::do_emit(nir_instr* ir)
45
const nir_alu_instr& instr = *nir_instr_as_alu(ir);
47
r600::sfn_log << SfnLog::instr << "emit '"
49
<< " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50
<< "' (" << __func__ << ")\n";
54
if (get_chip_class() == CAYMAN) {
56
case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos);
57
case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee);
58
case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped);
59
case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee);
60
case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1);
61
case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin);
62
case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee);
69
/* These are in the ALU instruction list, but they should be texture instructions */
70
case nir_op_b2b1: return emit_mov(instr);
71
case nir_op_b2b32: return emit_mov(instr);
72
case nir_op_b2f32: return emit_alu_b2f(instr);
73
case nir_op_b2i32: return emit_b2i32(instr);
74
case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
75
case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
76
case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
77
case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
78
case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
79
case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
80
case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
81
case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
82
case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
83
case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
84
case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
85
case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
86
case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
87
case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
88
case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
89
case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
90
case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
91
case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
92
case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
93
case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
94
case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
95
case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
96
case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
97
case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
98
case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
99
case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
100
case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
101
case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
103
case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
104
case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
105
case nir_op_cube_r600: return emit_cube(instr);
106
case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
107
case nir_op_f2b32: return emit_alu_f2b32(instr);
108
case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
109
case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
110
case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
111
case nir_op_fadd: return emit_alu_op2(instr, op2_add);
112
case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
113
case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
114
case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
115
case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
116
case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
118
/* These are in the ALU instruction list, but they should be texture instructions */
119
case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
120
case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
121
case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
122
case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
123
case nir_op_fddy_coarse:
124
case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
125
case nir_op_fdot2: return emit_dot(instr, 2);
126
case nir_op_fdot3: return emit_dot(instr, 3);
127
case nir_op_fdot4: return emit_dot(instr, 4);
128
case nir_op_fdph: return emit_fdph(instr);
129
case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
130
case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
131
case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
132
case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
133
case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
134
case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
135
case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
136
case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
137
case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
138
case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
139
case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
140
case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
141
case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142
case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
143
case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
144
case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
145
case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
146
case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
147
case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
148
case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
149
case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
150
case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
151
case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
152
case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
153
case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
154
case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
155
case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
156
case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
157
case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
158
case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
159
case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
160
case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
161
case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2});
162
case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2});
163
case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
164
case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
165
case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
166
case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
167
case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
168
case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
169
case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
170
case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
171
case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
172
case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
173
case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
174
case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
175
case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
176
case nir_op_ineg: return emit_alu_ineg(instr);
177
case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
178
case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
179
case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
180
case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
181
case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
182
case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
183
case nir_op_mov:return emit_mov(instr);
184
case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
185
case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186
case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
187
case nir_op_sge: return emit_alu_op2(instr, op2_setge);
188
case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
189
case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
190
case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
191
case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
192
case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
193
case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
194
case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
195
case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
196
case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
197
case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
198
case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199
case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
200
case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
201
case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
202
case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
203
case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
204
case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
205
case nir_op_vec2: return emit_create_vec(instr, 2);
206
case nir_op_vec3: return emit_create_vec(instr, 3);
207
case nir_op_vec4: return emit_create_vec(instr, 4);
213
void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
215
const nir_op_info *op_info = &nir_op_infos[instr.op];
216
assert(op_info->num_inputs <= 4);
218
unsigned nsrc_comp = num_src_comp(instr);
219
sfn_log << SfnLog::reg << "Preload:\n";
220
for (unsigned i = 0; i < op_info->num_inputs; ++i) {
221
for (unsigned c = 0; c < nsrc_comp; ++c) {
222
m_src[i][c] = from_nir(instr.src[i], c);
223
sfn_log << SfnLog::reg << " " << *m_src[i][c];
226
sfn_log << SfnLog::reg << "\n";
228
if (instr.op == nir_op_fdph) {
229
m_src[1][3] = from_nir(instr.src[1], 3);
230
sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
233
split_constants(instr, nsrc_comp);
236
unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
240
case nir_op_bany_inequal2:
241
case nir_op_ball_iequal2:
242
case nir_op_bany_fnequal2:
243
case nir_op_ball_fequal2:
244
case nir_op_b32any_inequal2:
245
case nir_op_b32all_iequal2:
246
case nir_op_b32any_fnequal2:
247
case nir_op_b32all_fequal2:
248
case nir_op_unpack_64_2x32_split_y:
252
case nir_op_bany_inequal3:
253
case nir_op_ball_iequal3:
254
case nir_op_bany_fnequal3:
255
case nir_op_ball_fequal3:
256
case nir_op_b32any_inequal3:
257
case nir_op_b32all_iequal3:
258
case nir_op_b32any_fnequal3:
259
case nir_op_b32all_fequal3:
260
case nir_op_cube_r600:
265
case nir_op_bany_inequal4:
266
case nir_op_ball_iequal4:
267
case nir_op_bany_fnequal4:
268
case nir_op_ball_fequal4:
269
case nir_op_b32any_inequal4:
270
case nir_op_b32all_iequal4:
271
case nir_op_b32any_fnequal4:
272
case nir_op_b32all_fequal4:
281
return nir_dest_num_components(instr.dest.dest);
286
bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
288
AluInstruction *ir = nullptr;
289
const uint16_t src0_chan[4] = {2, 2, 0, 1};
290
const uint16_t src1_chan[4] = {1, 0, 2, 2};
292
for (int i = 0; i < 4; ++i) {
293
ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
294
from_nir(instr.src[0], src0_chan[i]),
295
from_nir(instr.src[0], src1_chan[i]), {alu_write});
296
emit_instruction(ir);
298
ir->set_flag(alu_last_instr);
302
void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
304
const nir_op_info *op_info = &nir_op_infos[instr.op];
305
if (op_info->num_inputs < 2)
309
std::array<const UniformValue *,4> c;
310
std::array<int,4> idx;
311
for (unsigned i = 0; i < op_info->num_inputs; ++i) {
312
PValue& src = m_src[i][0];
314
sfn_log << SfnLog::reg << "Split test " << *src;
316
if (src->type() == Value::kconst) {
317
c[nconst] = static_cast<const UniformValue *>(src.get());
319
sfn_log << SfnLog::reg << " is constant " << i;
321
sfn_log << SfnLog::reg << "\n";
327
unsigned sel = c[0]->sel();
328
unsigned kcache = c[0]->kcache_bank();
329
sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
331
for (int i = 1; i < nconst; ++i) {
332
sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
333
if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
334
AluInstruction *ir = nullptr;
335
auto v = get_temp_vec4();
336
for (unsigned k = 0; k < nsrc_comp; ++k) {
337
ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
338
emit_instruction(ir);
339
m_src[idx[i]][k] = v[k];
346
bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
348
if (instr.src[0].negate || instr.src[0].abs) {
349
std::cerr << "source modifiers not supported with int ops\n";
353
AluInstruction *ir = nullptr;
354
for (int i = 0; i < 4 ; ++i) {
355
if (instr.dest.write_mask & (1 << i)){
356
ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
358
emit_instruction(ir);
365
bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
366
const AluOpFlags& flags)
368
AluInstruction *ir = nullptr;
369
for (int i = 0; i < 4 ; ++i) {
370
if (instr.dest.write_mask & (1 << i)){
371
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
374
if (flags.test(alu_src0_abs) || instr.src[0].abs)
375
ir->set_flag(alu_src0_abs);
377
if (instr.src[0].negate ^ flags.test(alu_src0_neg))
378
ir->set_flag(alu_src0_neg);
380
if (flags.test(alu_dst_clamp) || instr.dest.saturate)
381
ir->set_flag(alu_dst_clamp);
383
emit_instruction(ir);
391
bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
393
/* If the op is a plain move beween SSA values we can just forward
394
* the register reference to the original register */
395
if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
396
!instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
398
for (int i = 0; i < 4 ; ++i) {
399
if (instr.dest.write_mask & (1 << i)){
400
result &= inject_register(instr.dest.dest.ssa.index, i,
406
return emit_alu_op1(instr, op1_mov);
410
bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
413
AluInstruction *ir = nullptr;
414
std::set<int> src_idx;
416
if (get_chip_class() == CAYMAN) {
417
int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
418
for (int i = 0; i < last_slot; ++i) {
419
bool write_comp = instr.dest.write_mask & (1 << i);
420
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
421
m_src[0][write_comp ? i : 0], write_comp ? write : empty);
422
if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
423
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
424
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
426
if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
428
emit_instruction(ir);
431
for (int i = 0; i < 4 ; ++i) {
432
if (instr.dest.write_mask & (1 << i)){
433
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434
m_src[0][i], last_write);
435
if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438
emit_instruction(ir);
445
bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode)
447
AluInstruction *ir = nullptr;
448
std::set<int> src_idx;
450
unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
452
for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) {
453
for (unsigned i = 0; i < last_slot; ++i) {
454
bool write_comp = instr.dest.write_mask & (1 << j) && (i == j);
455
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
456
m_src[0][j], write_comp ? write : empty);
457
if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
458
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
459
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
461
if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
463
emit_instruction(ir);
470
bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
472
AluInstruction *ir = nullptr;
474
if (get_chip_class() < CAYMAN) {
475
std::array<PValue, 4> v;
477
for (int i = 0; i < 4; ++i) {
478
if (!(instr.dest.write_mask & (1 << i)))
480
v[i] = from_nir(instr.dest, i);
481
ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
482
if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
483
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
484
emit_instruction(ir);
488
for (int i = 0; i < 4; ++i) {
489
if (!(instr.dest.write_mask & (1 << i)))
491
ir = new AluInstruction(op, v[i], v[i], {alu_write});
492
emit_instruction(ir);
493
if (op == op1_flt_to_uint)
498
for (int i = 0; i < 4; ++i) {
499
if (!(instr.dest.write_mask & (1 << i)))
501
ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write});
502
if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
503
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
504
emit_instruction(ir);
505
if (op == op1_flt_to_uint)
514
bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
516
AluInstruction *ir = nullptr;
517
for (int i = 0; i < 4 ; ++i) {
518
if (instr.dest.write_mask & (1 << i)){
519
ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
520
m_src[0][i], literal(0.0f), write);
521
emit_instruction(ir);
528
bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
530
AluInstruction *ir = nullptr;
531
for (int i = 0; i < 4 ; ++i) {
532
if (!(instr.dest.write_mask & (1 << i)))
535
ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
536
m_src[0][i], Value::one_i, write);
537
emit_instruction(ir);
544
bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
546
AluInstruction *ir = nullptr;
547
for (unsigned i = 0; i < 2; ++i) {
548
if (!(instr.dest.write_mask & (1 << i)))
550
ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
552
emit_instruction(ir);
554
ir->set_flag(alu_last_instr);
558
bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
560
emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
561
m_src[0][comp], last_write));
565
bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
567
AluInstruction *ir = nullptr;
568
std::set<int> src_slot;
569
for(unsigned i = 0; i < nc; ++i) {
570
if (instr.dest.write_mask & (1 << i)){
571
auto src = m_src[i][0];
572
ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
573
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
575
// FIXME: This is a rather crude approach to fix the problem that
576
// r600 can't read from four different slots of the same component
577
// here we check only for the register index
578
if (src->type() == Value::gpr)
579
src_slot.insert(src->sel());
580
if (src_slot.size() >= 3) {
582
ir->set_flag(alu_last_instr);
584
emit_instruction(ir);
588
ir->set_flag(alu_last_instr);
592
bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
594
const nir_alu_src& src0 = instr.src[0];
595
const nir_alu_src& src1 = instr.src[1];
597
AluInstruction *ir = nullptr;
598
for (int i = 0; i < n ; ++i) {
599
ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
600
m_src[0][i], m_src[1][i],
601
instr.dest.write_mask & (1 << i) ? write : empty);
603
if (src0.negate) ir->set_flag(alu_src0_neg);
604
if (src0.abs) ir->set_flag(alu_src0_abs);
605
if (src1.negate) ir->set_flag(alu_src1_neg);
606
if (src1.abs) ir->set_flag(alu_src1_abs);
608
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
609
emit_instruction(ir);
611
for (int i = n; i < 4 ; ++i) {
612
ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613
Value::zero, Value::zero,
614
instr.dest.write_mask & (1 << i) ? write : empty);
615
emit_instruction(ir);
619
ir->set_flag(alu_last_instr);
623
bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
625
const nir_alu_src& src0 = instr.src[0];
626
const nir_alu_src& src1 = instr.src[1];
628
AluInstruction *ir = nullptr;
629
for (int i = 0; i < 3 ; ++i) {
630
ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
631
m_src[0][i], m_src[1][i],
632
instr.dest.write_mask & (1 << i) ? write : empty);
633
if (src0.negate) ir->set_flag(alu_src0_neg);
634
if (src0.abs) ir->set_flag(alu_src0_abs);
635
if (src1.negate) ir->set_flag(alu_src1_neg);
636
if (src1.abs) ir->set_flag(alu_src1_abs);
637
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
638
emit_instruction(ir);
641
ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
642
m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
643
if (src1.negate) ir->set_flag(alu_src1_neg);
644
if (src1.abs) ir->set_flag(alu_src1_abs);
645
emit_instruction(ir);
647
ir->set_flag(alu_last_instr);
652
bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
654
AluInstruction *ir = nullptr;
655
for (int i = 0; i < 4 ; ++i) {
656
if (instr.dest.write_mask & (1 << i)) {
657
ir = new AluInstruction(op, from_nir(instr.dest, i),
658
m_src[0][i], Value::zero,
660
emit_instruction(ir);
664
ir->set_flag(alu_last_instr);
668
bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
670
AluInstruction *ir = nullptr;
671
for (int i = 0; i < 4 ; ++i) {
672
if (instr.dest.write_mask & (1 << i)){
673
ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
674
m_src[0][i], Value::one_f, write);
675
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
676
if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
677
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
678
emit_instruction(ir);
682
ir->set_flag(alu_last_instr);
686
bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
689
AluInstruction *ir = nullptr;
690
PValue v[4]; // this might need some additional temp register creation
691
for (unsigned i = 0; i < 4 ; ++i)
692
v[i] = from_nir(instr.dest, i);
694
EAluOp combine = all ? op2_and_int : op2_or_int;
696
/* For integers we can not use the modifiers, so this needs some emulation */
697
/* Should actually be lowered with NIR */
698
if (instr.src[0].negate == instr.src[1].negate &&
699
instr.src[0].abs == instr.src[1].abs) {
701
for (unsigned i = 0; i < nc ; ++i) {
702
ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
703
emit_instruction(ir);
706
ir->set_flag(alu_last_instr);
708
std::cerr << "Negate in iequal/inequal not (yet) supported\n";
712
for (unsigned i = 0; i < nc/2 ; ++i) {
713
ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
714
emit_instruction(ir);
717
ir->set_flag(alu_last_instr);
720
ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
721
emit_instruction(ir);
727
bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
729
AluInstruction *ir = nullptr;
730
PValue v[4]; // this might need some additional temp register creation
731
for (unsigned i = 0; i < 4 ; ++i)
732
v[i] = from_nir(instr.dest, i);
734
for (unsigned i = 0; i < nc ; ++i) {
735
ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
737
if (instr.src[0].abs)
738
ir->set_flag(alu_src0_abs);
739
if (instr.src[0].negate)
740
ir->set_flag(alu_src0_neg);
742
if (instr.src[1].abs)
743
ir->set_flag(alu_src1_abs);
744
if (instr.src[1].negate)
745
ir->set_flag(alu_src1_neg);
747
emit_instruction(ir);
750
ir->set_flag(alu_last_instr);
752
for (unsigned i = 0; i < nc ; ++i) {
753
ir = new AluInstruction(op1_max4, v[i], v[i], write);
754
if (all) ir->set_flag(alu_src0_neg);
755
emit_instruction(ir);
758
for (unsigned i = nc; i < 4 ; ++i) {
759
ir = new AluInstruction(op1_max4, v[i],
760
all ? Value::one_f : Value::zero, write);
762
ir->set_flag(alu_src0_neg);
764
emit_instruction(ir);
767
ir->set_flag(alu_last_instr);
770
op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
772
op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
774
ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
776
ir->set_flag(alu_src1_neg);
777
emit_instruction(ir);
782
bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
784
AluInstruction *ir = nullptr;
785
PValue v[4]; // this might need some additional temp register creation
786
for (unsigned i = 0; i < 4 ; ++i)
787
v[i] = from_nir(instr.dest, i);
789
for (unsigned i = 0; i < 2 ; ++i) {
790
ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
791
if (instr.src[0].abs)
792
ir->set_flag(alu_src0_abs);
793
if (instr.src[0].negate)
794
ir->set_flag(alu_src0_neg);
796
if (instr.src[1].abs)
797
ir->set_flag(alu_src1_abs);
798
if (instr.src[1].negate)
799
ir->set_flag(alu_src1_neg);
801
emit_instruction(ir);
804
ir->set_flag(alu_last_instr);
806
op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
807
ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
808
emit_instruction(ir);
813
bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
815
const nir_alu_src& src0 = instr.src[0];
816
const nir_alu_src& src1 = instr.src[1];
818
AluInstruction *ir = nullptr;
820
if (get_chip_class() == CAYMAN) {
821
for (int k = 0; k < 4; ++k) {
822
if (instr.dest.write_mask & (1 << k)) {
824
for (int i = 0; i < 4; i++) {
825
ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty);
826
if (src0.negate) ir->set_flag(alu_src0_neg);
827
if (src0.abs) ir->set_flag(alu_src0_abs);
828
if (src1.negate) ir->set_flag(alu_src1_neg);
829
if (src1.abs) ir->set_flag(alu_src1_abs);
830
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
831
if (i == 3) ir->set_flag(alu_last_instr);
832
emit_instruction(ir);
837
for (int i = 0; i < 4 ; ++i) {
838
if (instr.dest.write_mask & (1 << i)){
839
ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
840
if (src0.negate) ir->set_flag(alu_src0_neg);
841
if (src0.abs) ir->set_flag(alu_src0_abs);
842
if (src1.negate) ir->set_flag(alu_src1_neg);
843
if (src1.abs) ir->set_flag(alu_src1_abs);
844
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845
emit_instruction(ir);
852
bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
855
const nir_alu_src& src0 = instr.src[0];
856
const nir_alu_src& src1 = instr.src[1];
858
if (src0.negate || src1.negate ||
859
src0.abs || src1.abs) {
860
std::cerr << "R600: don't support modifiers with integer operations";
863
return emit_alu_op2(instr, opcode, opts);
866
bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
868
const nir_alu_src *src0 = &instr.src[0];
869
const nir_alu_src *src1 = &instr.src[1];
873
if (ops & op2_opt_reverse) {
874
std::swap(src0, src1);
875
std::swap(idx0, idx1);
878
bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
880
AluInstruction *ir = nullptr;
881
for (int i = 0; i < 4 ; ++i) {
882
if (instr.dest.write_mask & (1 << i)){
883
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
884
m_src[idx0][i], m_src[idx1][i], write);
886
if (src0->negate) ir->set_flag(alu_src0_neg);
887
if (src0->abs) ir->set_flag(alu_src0_abs);
888
if (src1_negate) ir->set_flag(alu_src1_neg);
889
if (src1->abs) ir->set_flag(alu_src1_abs);
890
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
891
emit_instruction(ir);
895
ir->set_flag(alu_last_instr);
899
bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
900
std::array<uint8_t, 3> reorder)
902
const nir_alu_src *src[3];
903
src[0] = &instr.src[reorder[0]];
904
src[1] = &instr.src[reorder[1]];
905
src[2] = &instr.src[reorder[2]];
907
AluInstruction *ir = nullptr;
908
for (int i = 0; i < 4 ; ++i) {
909
if (instr.dest.write_mask & (1 << i)){
910
ir = new AluInstruction(opcode, from_nir(instr.dest, i),
911
m_src[reorder[0]][i],
912
m_src[reorder[1]][i],
913
m_src[reorder[2]][i],
916
if (src[0]->negate) ir->set_flag(alu_src0_neg);
917
if (src[1]->negate) ir->set_flag(alu_src1_neg);
918
if (src[2]->negate) ir->set_flag(alu_src2_neg);
920
if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
921
ir->set_flag(alu_write);
922
emit_instruction(ir);
929
bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
931
AluInstruction *ir = nullptr;
932
for (int i = 0; i < 4 ; ++i) {
933
if (instr.dest.write_mask & (1 << i)){
934
ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
936
emit_instruction(ir);
940
ir->set_flag(alu_last_instr);
945
static const char swz[] = "xyzw01?_";
947
void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
948
const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
951
AluInstruction *alu = nullptr;
952
for (int i = 0; i < ncomp; ++i) {
953
alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write});
955
alu->set_flag(alu_src0_abs);
957
alu->set_flag(alu_src0_neg);
958
emit_instruction(alu);
963
bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
968
std::array<int, 4> writemask = {0,1,2,3};
970
int ncomp = nir_dest_num_components(instr.dest.dest);
971
GPRVector::Swizzle src_swz = {7,7,7,7};
972
for (auto i = 0; i < ncomp; ++i)
973
src_swz[i] = instr.src[0].swizzle[i];
975
auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
977
if (instr.src[0].abs || instr.src[0].negate) {
978
GPRVector tmp = get_temp_vec4();
979
split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
983
for (int i = 0; i < 4; ++i) {
984
writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
985
v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
988
/* This is querying the dreivatives of the output fb, so we would either need
989
* access to the neighboring pixels or to the framebuffer. Neither is currently
993
auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
994
tex->set_dest_swizzle(writemask);
997
tex->set_flag(TexInstruction::grad_fine);
999
emit_instruction(tex);
1004
bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1006
auto tmp = get_temp_register();
1007
emit_instruction(op2_lshr_int, tmp,
1008
{m_src[0][0], PValue(new LiteralValue(16))},
1009
{alu_write, alu_last_instr});
1011
emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1012
{tmp}, {alu_write, alu_last_instr});
1017
bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1019
emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1020
{m_src[0][0]},{alu_write, alu_last_instr});
1024
bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1026
PValue x = get_temp_register();
1027
PValue y = get_temp_register();
1029
emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1030
emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1032
emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1034
emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});