2
2
* Copyright 2016 Advanced Micro Devices, Inc.
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
4
* SPDX-License-Identifier: MIT
25
7
#ifndef SI_SHADER_PRIVATE_H
26
8
#define SI_SHADER_PRIVATE_H
28
10
#include "ac_shader_abi.h"
11
#include "ac_llvm_build.h"
29
12
#include "si_shader.h"
31
14
struct util_debug_callback;
60
43
* [0:5] = the number of patches per threadgroup - 1, max = 63
62
45
* [6:10] = the number of output vertices per patch - 1, max = 31
64
* [11:31] = the offset of per patch attributes in the buffer in bytes.
65
* max = NUM_PATCHES*32*32*16 = 1M
47
* [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only)
49
* [16:31] = the offset of per patch attributes in the buffer in bytes.
50
* 64 outputs are implied by SI_UNIQUE_SLOT_* values.
51
* max = 32(CPs) * 64(outputs) * 16(vec4) * 64(num_patches) = 2M,
52
* clamped to 32K(LDS limit) = 32K
67
54
struct ac_arg tcs_offchip_layout;
70
/* Offsets where TCS outputs and TCS patch outputs live in LDS (<= 16K):
71
* [16:31] = TCS output patch0 offset for per-patch / 4, max = 16K / 4 = 4K
73
struct ac_arg tcs_out_lds_offsets;
74
/* Layout of TCS outputs / TES inputs:
75
* [13:18] = gl_PatchVerticesIn, max = 32
76
* [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
78
struct ac_arg tcs_out_lds_layout;
81
57
struct ac_arg tes_offchip_addr;
83
59
struct ac_arg pos_fixed_pt;
84
60
struct ac_arg alpha_reference;
61
struct ac_arg color_start;
86
63
struct ac_arg block_size;
87
64
struct ac_arg cs_user_data;
114
91
struct ac_llvm_compiler *compiler;
116
93
/* Preloaded descriptors. */
117
LLVMValueRef esgs_ring;
118
LLVMValueRef gsvs_ring[4];
119
LLVMValueRef tess_offchip_ring;
120
94
LLVMValueRef instance_divisor_constbuf;
122
96
LLVMValueRef gs_ngg_emit;
152
126
ac_nir_gs_output_info *output_info);
153
127
void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key);
154
128
bool si_need_ps_prolog(const union si_shader_part_key *key);
155
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,
156
bool separate_prolog);
129
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key);
157
130
void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *key);
159
132
/* gfx10_shader_ngg.c */
199
172
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
200
173
struct ac_arg param, unsigned return_index);
201
174
LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx);
202
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
203
175
LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
204
176
unsigned bitwidth);
205
177
void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts,
215
187
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
216
188
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx);
217
189
void si_llvm_es_build_end(struct si_shader_context *ctx);
218
void si_preload_esgs_ring(struct si_shader_context *ctx);
219
void si_preload_gs_rings(struct si_shader_context *ctx);
220
190
void si_llvm_gs_build_end(struct si_shader_context *ctx);
221
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
223
192
/* si_shader_llvm_tess.c */
224
193
LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx);
225
void si_llvm_preload_tess_rings(struct si_shader_context *ctx);
226
194
void si_llvm_ls_build_end(struct si_shader_context *ctx);
227
195
void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key,
228
196
bool separate_epilog);
234
202
bool separate_prolog);
235
203
void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key,
236
204
bool separate_epilog);
237
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader);
238
205
void si_llvm_ps_build_end(struct si_shader_context *ctx);
240
207
/* si_shader_llvm_vs.c */
241
208
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,
242
209
bool separate_prolog);
211
/* si_shader_aco.c */
212
bool si_aco_compile_shader(struct si_shader *shader,
213
struct si_shader_args *args,
214
struct nir_shader *nir,
215
struct util_debug_callback *debug);
216
void si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code, uint64_t scratch_va);