3
* Copyright (c) 2018 Collabora LTD
5
* Author: Gert Wollny <gert.wollny@collabora.com>
7
* Permission is hereby granted, free of charge, to any person obtaining a
8
* copy of this software and associated documentation files (the "Software"),
9
* to deal in the Software without restriction, including without limitation
10
* on the rights to use, copy, modify, merge, publish, distribute, sub
11
* license, and/or sell copies of the Software, and to permit persons to whom
12
* the Software is furnished to do so, subject to the following conditions:
14
* The above copyright notice and this permission notice (including the next
15
* paragraph) shall be included in all copies or substantial portions of the
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24
* USE OR OTHER DEALINGS IN THE SOFTWARE.
27
#include "sfn_shader_geometry.h"
28
#include "sfn_instruction_misc.h"
29
#include "sfn_instruction_fetch.h"
30
#include "sfn_shaderio.h"
34
GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
35
r600_pipe_shader_selector &sel,
36
const r600_shader_key &key,
37
enum chip_class chip_class):
38
VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
39
sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter),
42
m_first_vertex_emitted(false),
44
m_next_input_ring_offset(0),
48
m_gs_tri_strip_adj_fix(false),
51
sh_info().atomic_base = key.gs.first_atomic_counter;
54
bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr)
56
auto location = nir_intrinsic_io_semantics(instr).location;
57
auto index = nir_src_as_const_value(instr->src[1]);
59
auto driver_location = nir_intrinsic_base(instr) + index->u32;
61
uint32_t write_mask = nir_intrinsic_write_mask(instr);
62
GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
64
auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true);
66
sh_info().output[driver_location].write_mask = write_mask;
68
auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
70
instr->num_components, m_export_base[0]);
71
streamout_data[location] = ir;
76
bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
78
if (instr->type != nir_instr_type_intrinsic)
81
nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
83
switch (ii->intrinsic) {
84
case nir_intrinsic_store_output:
85
return process_store_output(ii);
86
case nir_intrinsic_load_input:
87
case nir_intrinsic_load_per_vertex_input:
88
return process_load_input(ii);
94
bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr)
96
auto location = nir_intrinsic_io_semantics(instr).location;
97
auto index = nir_src_as_const_value(instr->src[1]);
100
auto driver_location = nir_intrinsic_base(instr) + index->u32;
102
if (location == VARYING_SLOT_COL0 ||
103
location == VARYING_SLOT_COL1 ||
104
(location >= VARYING_SLOT_VAR0 &&
105
location <= VARYING_SLOT_VAR31) ||
106
(location >= VARYING_SLOT_TEX0 &&
107
location <= VARYING_SLOT_TEX7) ||
108
location == VARYING_SLOT_BFC0 ||
109
location == VARYING_SLOT_BFC1 ||
110
location == VARYING_SLOT_PNTC ||
111
location == VARYING_SLOT_CLIP_VERTEX ||
112
location == VARYING_SLOT_CLIP_DIST0 ||
113
location == VARYING_SLOT_CLIP_DIST1 ||
114
location == VARYING_SLOT_PRIMITIVE_ID ||
115
location == VARYING_SLOT_POS ||
116
location == VARYING_SLOT_PSIZ ||
117
location == VARYING_SLOT_LAYER ||
118
location == VARYING_SLOT_VIEWPORT ||
119
location == VARYING_SLOT_FOGC) {
120
r600_shader_io& io = sh_info().output[driver_location];
122
auto semantic = r600_get_varying_semantic(location);
123
io.name = semantic.first;
124
io.sid = semantic.second;
126
evaluate_spi_sid(io);
128
if (sh_info().noutput <= driver_location)
129
sh_info().noutput = driver_location + 1;
131
if (location == VARYING_SLOT_CLIP_DIST0 ||
132
location == VARYING_SLOT_CLIP_DIST1) {
133
m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
136
if (location == VARYING_SLOT_VIEWPORT) {
137
sh_info().vs_out_viewport = 1;
138
sh_info().vs_out_misc_write = 1;
145
bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr)
147
auto location = nir_intrinsic_io_semantics(instr).location;
148
auto index = nir_src_as_const_value(instr->src[1]);
151
auto driver_location = nir_intrinsic_base(instr) + index->u32;
153
if (location == VARYING_SLOT_POS ||
154
location == VARYING_SLOT_PSIZ ||
155
location == VARYING_SLOT_FOGC ||
156
location == VARYING_SLOT_CLIP_VERTEX ||
157
location == VARYING_SLOT_CLIP_DIST0 ||
158
location == VARYING_SLOT_CLIP_DIST1 ||
159
location == VARYING_SLOT_COL0 ||
160
location == VARYING_SLOT_COL1 ||
161
location == VARYING_SLOT_BFC0 ||
162
location == VARYING_SLOT_BFC1 ||
163
location == VARYING_SLOT_PNTC ||
164
(location >= VARYING_SLOT_VAR0 &&
165
location <= VARYING_SLOT_VAR31) ||
166
(location >= VARYING_SLOT_TEX0 &&
167
location <= VARYING_SLOT_TEX7)) {
169
uint64_t bit = 1ull << location;
170
if (!(bit & m_input_mask)) {
171
r600_shader_io& io = sh_info().input[driver_location];
172
auto semantic = r600_get_varying_semantic(location);
173
io.name = semantic.first;
174
io.sid = semantic.second;
176
io.ring_offset = 16 * driver_location;
178
m_next_input_ring_offset += 16;
186
bool GeometryShaderFromNir::do_allocate_reserved_registers()
188
const int sel[6] = {0, 0 ,0, 1, 1, 1};
189
const int chan[6] = {0, 1 ,3, 0, 1, 2};
191
increment_reserved_registers();
192
increment_reserved_registers();
194
/* Reserve registers used by the shaders (should check how many
195
* components are actually used */
196
for (int i = 0; i < 6; ++i) {
197
auto reg = new GPRValue(sel[i], chan[i]);
199
m_per_vertex_offsets[i].reset(reg);
200
inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
202
auto reg = new GPRValue(0, 2);
204
m_primitive_id.reset(reg);
205
inject_register(0, 2, m_primitive_id, false);
207
reg = new GPRValue(1, 3);
209
m_invocation_id.reset(reg);
210
inject_register(1, 3, m_invocation_id, false);
212
m_export_base[0] = get_temp_register(0);
213
m_export_base[1] = get_temp_register(0);
214
m_export_base[2] = get_temp_register(0);
215
m_export_base[3] = get_temp_register(0);
216
emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr}));
217
emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr}));
218
emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr}));
219
emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr}));
221
sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
223
if (m_key.gs.tri_strip_adj_fix)
229
void GeometryShaderFromNir::emit_adj_fix()
231
PValue adjhelp0(new GPRValue(m_export_base[0]->sel(), 1));
232
emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
235
int reg_chanels[6] = {1, 2, 3, 1, 2, 3};
237
int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
239
reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
240
reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
242
std::array<PValue, 6> adjhelp;
244
AluInstruction *ir = nullptr;
245
for (int i = 0; i < 6; i++) {
246
adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
247
ir = new AluInstruction(op3_cnde_int, adjhelp[i],
248
{adjhelp0, m_per_vertex_offsets[i],
249
m_per_vertex_offsets[rotate_indices[i]]},
251
if ((get_chip_class() == CAYMAN && i == 2) || (i == 3))
252
ir->set_flag(alu_last_instr);
253
emit_instruction(ir);
255
ir->set_flag(alu_last_instr);
257
for (int i = 0; i < 6; i++)
258
m_per_vertex_offsets[i] = adjhelp[i];
262
bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
264
switch (instr->intrinsic) {
265
case nir_intrinsic_emit_vertex:
266
return emit_vertex(instr, false);
267
case nir_intrinsic_end_primitive:
268
return emit_vertex(instr, true);
269
case nir_intrinsic_load_primitive_id:
270
return load_preloaded_value(instr->dest, 0, m_primitive_id);
271
case nir_intrinsic_load_invocation_id:
272
return load_preloaded_value(instr->dest, 0, m_invocation_id);
273
case nir_intrinsic_store_output:
274
return emit_store(instr);
275
case nir_intrinsic_load_per_vertex_input:
276
return emit_load_per_vertex_input(instr);
283
bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
285
int stream = nir_intrinsic_stream_id(instr);
288
for(auto v: streamout_data) {
289
if (stream == 0 || v.first != VARYING_SLOT_POS) {
290
v.second->patch_ring(stream, m_export_base[stream]);
291
emit_instruction(v.second);
295
streamout_data.clear();
296
emit_instruction(new EmitVertex(stream, cut));
299
emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream],
300
PValue(new LiteralValue(sh_info().noutput)),
301
{alu_write, alu_last_instr}));
306
bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
308
auto dest = vec_from_nir(instr->dest, 4);
310
std::array<int, 4> swz = {7,7,7,7};
311
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
312
swz[i] = i + nir_intrinsic_component(instr);
315
auto literal_index = nir_src_as_const_value(instr->src[0]);
317
if (!literal_index) {
318
sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
321
assert(literal_index->u32 < 6);
322
assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
324
PValue addr = m_per_vertex_offsets[literal_index->u32];
325
auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
326
16 * nir_intrinsic_base(instr),
327
R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
328
fetch->set_dest_swizzle(swz);
330
emit_instruction(fetch);
334
void GeometryShaderFromNir::do_finalize()
336
if (m_clip_dist_mask) {
337
int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask);
338
sh_info().cc_dist_mask = (1 << num_clip_dist) - 1;
339
sh_info().clip_dist_write = (1 << num_clip_dist) - 1;