2
// Copyright 2019 Karol Herbst
4
// Permission is hereby granted, free of charge, to any person obtaining a
5
// copy of this software and associated documentation files (the "Software"),
6
// to deal in the Software without restriction, including without limitation
7
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
// and/or sell copies of the Software, and to permit persons to whom the
9
// Software is furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
// OTHER DEALINGS IN THE SOFTWARE.
23
#include "invocation.hpp"
27
#include "core/device.hpp"
28
#include "core/error.hpp"
29
#include "core/binary.hpp"
30
#include "pipe/p_state.h"
31
#include "util/algorithm.hpp"
32
#include "util/functional.hpp"
34
#include <compiler/glsl_types.h>
35
#include <compiler/nir/nir_builder.h>
36
#include <compiler/nir/nir_serialize.h>
37
#include <compiler/spirv/nir_spirv.h>
38
#include <util/u_math.h>
40
using namespace clover;
42
#ifdef HAVE_CLOVER_SPIRV
44
// Refs and unrefs the glsl_type_singleton.
45
static class glsl_type_ref {
48
glsl_type_singleton_init_or_ref();
52
glsl_type_singleton_decref();
56
static const nir_shader_compiler_options *
57
dev_get_nir_compiler_options(const device &dev)
59
const void *co = dev.get_compiler_options(PIPE_SHADER_IR_NIR);
60
return static_cast<const nir_shader_compiler_options*>(co);
63
static void debug_function(void *private_data,
64
enum nir_spirv_debug_level level, size_t spirv_offset,
68
auto r_log = reinterpret_cast<std::string *>(private_data);
73
clover_arg_size_align(const glsl_type *type, unsigned *size, unsigned *align)
75
if (type == glsl_type::sampler_type || type->is_image()) {
79
*size = type->cl_size();
80
*align = type->cl_alignment();
85
clover_nir_add_image_uniforms(nir_shader *shader)
87
/* Clover expects each image variable to take up a cl_mem worth of space in
88
* the arguments data. Add uniforms as needed to match this expectation.
90
nir_foreach_image_variable_safe(var, shader) {
91
nir_variable *uniform = rzalloc(shader, nir_variable);
92
uniform->name = ralloc_strdup(uniform, var->name);
93
uniform->type = glsl_uintN_t_type(sizeof(cl_mem) * 8);
94
uniform->data.mode = nir_var_uniform;
95
uniform->data.read_only = true;
96
uniform->data.location = var->data.location;
98
exec_node_insert_node_before(&var->node, &uniform->node);
103
clover_nir_lower_images(nir_shader *shader)
105
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
107
ASSERTED int last_loc = -1;
108
int num_rd_images = 0, num_wr_images = 0;
109
nir_foreach_image_variable(var, shader) {
110
/* Assume they come in order */
111
assert(var->data.location > last_loc);
112
last_loc = var->data.location;
114
if (var->data.access & ACCESS_NON_WRITEABLE)
115
var->data.driver_location = num_rd_images++;
117
var->data.driver_location = num_wr_images++;
119
shader->info.num_textures = num_rd_images;
120
BITSET_ZERO(shader->info.textures_used);
122
BITSET_SET_RANGE_INSIDE_WORD(shader->info.textures_used, 0, num_rd_images - 1);
123
shader->info.num_images = num_wr_images;
126
int num_samplers = 0;
127
nir_foreach_uniform_variable(var, shader) {
128
if (var->type == glsl_bare_sampler_type()) {
129
/* Assume they come in order */
130
assert(var->data.location > last_loc);
131
last_loc = var->data.location;
133
/* TODO: Constant samplers */
134
var->data.driver_location = num_samplers++;
136
/* CL shouldn't have any sampled images */
137
assert(!glsl_type_is_sampler(var->type));
142
nir_builder_init(&b, impl);
144
bool progress = false;
145
nir_foreach_block_reverse(block, impl) {
146
nir_foreach_instr_reverse_safe(instr, block) {
147
switch (instr->type) {
148
case nir_instr_type_deref: {
149
nir_deref_instr *deref = nir_instr_as_deref(instr);
150
if (deref->deref_type != nir_deref_type_var)
153
if (!glsl_type_is_image(deref->type) &&
154
!glsl_type_is_sampler(deref->type))
157
b.cursor = nir_instr_remove(&deref->instr);
159
nir_imm_intN_t(&b, deref->var->data.driver_location,
160
deref->dest.ssa.bit_size);
161
nir_ssa_def_rewrite_uses(&deref->dest.ssa, loc);
166
case nir_instr_type_tex: {
167
nir_tex_instr *tex = nir_instr_as_tex(instr);
169
for (unsigned i = 0; i < tex->num_srcs; i++) {
170
if (tex->src[i].src_type == nir_tex_src_texture_deref ||
171
tex->src[i].src_type == nir_tex_src_sampler_deref) {
172
nir_deref_instr *deref = nir_src_as_deref(tex->src[i].src);
173
if (deref->deref_type == nir_deref_type_var) {
174
/* In this case, we know the actual variable */
175
if (tex->src[i].src_type == nir_tex_src_texture_deref)
176
tex->texture_index = deref->var->data.driver_location;
178
tex->sampler_index = deref->var->data.driver_location;
179
/* This source gets discarded */
180
nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,
184
assert(tex->src[i].src.is_ssa);
185
b.cursor = nir_before_instr(&tex->instr);
186
/* Back-ends expect a 32-bit thing, not 64-bit */
187
nir_ssa_def *offset = nir_u2u32(&b, tex->src[i].src.ssa);
188
if (tex->src[i].src_type == nir_tex_src_texture_deref)
189
tex->src[count].src_type = nir_tex_src_texture_offset;
191
tex->src[count].src_type = nir_tex_src_sampler_offset;
192
nir_instr_rewrite_src(&tex->instr, &tex->src[count].src,
193
nir_src_for_ssa(offset));
196
/* If we've removed a source, move this one down */
199
tex->src[count].src_type = tex->src[i].src_type;
200
nir_instr_move_src(&tex->instr, &tex->src[count].src,
206
tex->num_srcs = count;
211
case nir_instr_type_intrinsic: {
212
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
213
switch (intrin->intrinsic) {
214
case nir_intrinsic_image_deref_load:
215
case nir_intrinsic_image_deref_store:
216
case nir_intrinsic_image_deref_atomic_add:
217
case nir_intrinsic_image_deref_atomic_imin:
218
case nir_intrinsic_image_deref_atomic_umin:
219
case nir_intrinsic_image_deref_atomic_imax:
220
case nir_intrinsic_image_deref_atomic_umax:
221
case nir_intrinsic_image_deref_atomic_and:
222
case nir_intrinsic_image_deref_atomic_or:
223
case nir_intrinsic_image_deref_atomic_xor:
224
case nir_intrinsic_image_deref_atomic_exchange:
225
case nir_intrinsic_image_deref_atomic_comp_swap:
226
case nir_intrinsic_image_deref_atomic_fadd:
227
case nir_intrinsic_image_deref_atomic_inc_wrap:
228
case nir_intrinsic_image_deref_atomic_dec_wrap:
229
case nir_intrinsic_image_deref_size:
230
case nir_intrinsic_image_deref_samples: {
231
assert(intrin->src[0].is_ssa);
232
b.cursor = nir_before_instr(&intrin->instr);
233
/* Back-ends expect a 32-bit thing, not 64-bit */
234
nir_ssa_def *offset = nir_u2u32(&b, intrin->src[0].ssa);
235
nir_rewrite_image_intrinsic(intrin, offset, false);
253
nir_metadata_preserve(impl, nir_metadata_block_index |
254
nir_metadata_dominance);
256
nir_metadata_preserve(impl, nir_metadata_all);
262
struct clover_lower_nir_state {
263
std::vector<binary::argument> &args;
264
uint32_t global_dims;
265
nir_variable *constant_var;
266
nir_variable *printf_buffer;
267
nir_variable *offset_vars[3];
271
clover_lower_nir_filter(const nir_instr *instr, const void *)
273
return instr->type == nir_instr_type_intrinsic;
277
clover_lower_nir_instr(nir_builder *b, nir_instr *instr, void *_state)
279
clover_lower_nir_state *state = reinterpret_cast<clover_lower_nir_state*>(_state);
280
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
282
switch (intrinsic->intrinsic) {
283
case nir_intrinsic_load_printf_buffer_address: {
284
if (!state->printf_buffer) {
285
unsigned location = state->args.size();
286
state->args.emplace_back(binary::argument::global, sizeof(size_t),
287
8, 8, binary::argument::zero_ext,
288
binary::argument::printf_buffer);
290
const glsl_type *type = glsl_uint64_t_type();
291
state->printf_buffer = nir_variable_create(b->shader, nir_var_uniform,
292
type, "global_printf_buffer");
293
state->printf_buffer->data.location = location;
295
return nir_load_var(b, state->printf_buffer);
297
case nir_intrinsic_load_base_global_invocation_id: {
298
nir_ssa_def *loads[3];
300
/* create variables if we didn't do so alrady */
301
if (!state->offset_vars[0]) {
302
/* TODO: fix for 64 bit */
303
/* Even though we only place one scalar argument, clover will bind up to
304
* three 32 bit values
306
unsigned location = state->args.size();
307
state->args.emplace_back(binary::argument::scalar, 4, 4, 4,
308
binary::argument::zero_ext,
309
binary::argument::grid_offset);
311
const glsl_type *type = glsl_uint_type();
312
for (uint32_t i = 0; i < 3; i++) {
313
state->offset_vars[i] =
314
nir_variable_create(b->shader, nir_var_uniform, type,
315
"global_invocation_id_offsets");
316
state->offset_vars[i]->data.location = location + i;
320
for (int i = 0; i < 3; i++) {
321
nir_variable *var = state->offset_vars[i];
322
loads[i] = var ? nir_load_var(b, var) : nir_imm_int(b, 0);
325
return nir_u2u(b, nir_vec(b, loads, state->global_dims),
326
nir_dest_bit_size(intrinsic->dest));
328
case nir_intrinsic_load_constant_base_ptr: {
329
return nir_load_var(b, state->constant_var);
338
clover_lower_nir(nir_shader *nir, std::vector<binary::argument> &args,
339
uint32_t dims, uint32_t pointer_bit_size)
341
nir_variable *constant_var = NULL;
342
if (nir->constant_data_size) {
343
const glsl_type *type = pointer_bit_size == 64 ? glsl_uint64_t_type() : glsl_uint_type();
345
constant_var = nir_variable_create(nir, nir_var_uniform, type,
346
"constant_buffer_addr");
347
constant_var->data.location = args.size();
349
args.emplace_back(binary::argument::global, sizeof(cl_mem),
350
pointer_bit_size / 8, pointer_bit_size / 8,
351
binary::argument::zero_ext,
352
binary::argument::constant_buffer);
355
clover_lower_nir_state state = { args, dims, constant_var };
356
return nir_shader_lower_instructions(nir,
357
clover_lower_nir_filter, clover_lower_nir_instr, &state);
360
static spirv_to_nir_options
361
create_spirv_options(const device &dev, std::string &r_log)
363
struct spirv_to_nir_options spirv_options = {};
364
spirv_options.environment = NIR_SPIRV_OPENCL;
365
if (dev.address_bits() == 32u) {
366
spirv_options.shared_addr_format = nir_address_format_32bit_offset;
367
spirv_options.global_addr_format = nir_address_format_32bit_global;
368
spirv_options.temp_addr_format = nir_address_format_32bit_offset;
369
spirv_options.constant_addr_format = nir_address_format_32bit_global;
371
spirv_options.shared_addr_format = nir_address_format_32bit_offset_as_64bit;
372
spirv_options.global_addr_format = nir_address_format_64bit_global;
373
spirv_options.temp_addr_format = nir_address_format_32bit_offset_as_64bit;
374
spirv_options.constant_addr_format = nir_address_format_64bit_global;
376
spirv_options.caps.address = true;
377
spirv_options.caps.float64 = true;
378
spirv_options.caps.int8 = true;
379
spirv_options.caps.int16 = true;
380
spirv_options.caps.int64 = true;
381
spirv_options.caps.kernel = true;
382
spirv_options.caps.kernel_image = dev.image_support();
383
spirv_options.caps.int64_atomics = dev.has_int64_atomics();
384
spirv_options.debug.func = &debug_function;
385
spirv_options.debug.private_data = &r_log;
386
spirv_options.caps.printf = true;
387
return spirv_options;
390
struct disk_cache *clover::nir::create_clc_disk_cache(void)
392
struct mesa_sha1 ctx;
393
unsigned char sha1[20];
394
char cache_id[20 * 2 + 1];
395
_mesa_sha1_init(&ctx);
397
if (!disk_cache_get_function_identifier((void *)clover::nir::create_clc_disk_cache, &ctx))
400
_mesa_sha1_final(&ctx, sha1);
402
disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
403
return disk_cache_create("clover-clc", cache_id, 0);
406
void clover::nir::check_for_libclc(const device &dev)
408
if (!nir_can_find_libclc(dev.address_bits()))
409
throw error(CL_COMPILER_NOT_AVAILABLE);
412
nir_shader *clover::nir::load_libclc_nir(const device &dev, std::string &r_log)
414
spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
415
auto *compiler_options = dev_get_nir_compiler_options(dev);
417
return nir_load_libclc_shader(dev.address_bits(), dev.clc_cache,
418
&spirv_options, compiler_options);
422
can_remove_var(nir_variable *var, void *data)
424
return !(var->type->is_sampler() ||
425
var->type->is_texture() ||
426
var->type->is_image());
429
binary clover::nir::spirv_to_nir(const binary &mod, const device &dev,
432
spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);
433
std::shared_ptr<nir_shader> nir = dev.clc_nir;
434
spirv_options.clc_shader = nir.get();
437
// We only insert one section.
438
assert(mod.secs.size() == 1);
439
auto §ion = mod.secs[0];
441
binary::resource_id section_id = 0;
442
for (const auto &sym : mod.syms) {
443
assert(sym.section == 0);
446
reinterpret_cast<const pipe_binary_program_header *>(section.data.data());
447
const uint32_t *data = reinterpret_cast<const uint32_t *>(binary->blob);
448
const size_t num_words = binary->num_bytes / 4;
449
const char *name = sym.name.c_str();
450
auto *compiler_options = dev_get_nir_compiler_options(dev);
452
nir_shader *nir = spirv_to_nir(data, num_words, nullptr, 0,
453
MESA_SHADER_KERNEL, name,
454
&spirv_options, compiler_options);
456
r_log += "Translation from SPIR-V to NIR for kernel \"" + sym.name +
461
nir->info.workgroup_size_variable = sym.reqd_work_group_size[0] == 0;
462
nir->info.workgroup_size[0] = sym.reqd_work_group_size[0];
463
nir->info.workgroup_size[1] = sym.reqd_work_group_size[1];
464
nir->info.workgroup_size[2] = sym.reqd_work_group_size[2];
465
nir_validate_shader(nir, "clover");
467
// Inline all functions first.
468
// according to the comment on nir_inline_functions
469
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
470
NIR_PASS_V(nir, nir_lower_returns);
471
NIR_PASS_V(nir, nir_lower_libclc, spirv_options.clc_shader);
473
NIR_PASS_V(nir, nir_inline_functions);
474
NIR_PASS_V(nir, nir_copy_prop);
475
NIR_PASS_V(nir, nir_opt_deref);
477
// Pick off the single entrypoint that we want.
478
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
479
if (!func->is_entrypoint)
480
exec_node_remove(&func->node);
482
assert(exec_list_length(&nir->functions) == 1);
484
nir_validate_shader(nir, "clover after function inlining");
486
NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
488
struct nir_lower_printf_options printf_options;
489
printf_options.treat_doubles_as_floats = false;
490
printf_options.max_buffer_size = dev.max_printf_buffer_size();
492
NIR_PASS_V(nir, nir_lower_printf, &printf_options);
494
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
496
// copy propagate to prepare for lower_explicit_io
497
NIR_PASS_V(nir, nir_split_var_copies);
498
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
499
NIR_PASS_V(nir, nir_lower_var_copies);
500
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
501
NIR_PASS_V(nir, nir_opt_dce);
502
NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
504
if (compiler_options->lower_to_scalar) {
505
NIR_PASS_V(nir, nir_lower_alu_to_scalar,
506
compiler_options->lower_to_scalar_filter, NULL);
508
NIR_PASS_V(nir, nir_lower_system_values);
509
nir_lower_compute_system_values_options sysval_options = { 0 };
510
sysval_options.has_base_global_invocation_id = true;
511
NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
513
// constant fold before lowering mem constants
514
NIR_PASS_V(nir, nir_opt_constant_folding);
516
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_constant, NULL);
517
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_constant,
518
glsl_get_cl_type_size_align);
519
if (nir->constant_data_size > 0) {
520
assert(nir->constant_data == NULL);
521
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
522
nir_gather_explicit_io_initializers(nir, nir->constant_data,
523
nir->constant_data_size,
524
nir_var_mem_constant);
526
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
527
spirv_options.constant_addr_format);
529
auto args = sym.args;
530
NIR_PASS_V(nir, clover_lower_nir, args, dev.max_block_size().size(),
533
NIR_PASS_V(nir, clover_nir_add_image_uniforms);
534
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
535
nir_var_uniform, clover_arg_size_align);
536
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
537
nir_var_mem_shared | nir_var_mem_global |
538
nir_var_function_temp,
539
glsl_get_cl_type_size_align);
541
NIR_PASS_V(nir, nir_opt_deref);
542
NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
543
NIR_PASS_V(nir, clover_nir_lower_images);
544
NIR_PASS_V(nir, nir_lower_memcpy);
546
/* use offsets for kernel inputs (uniform) */
547
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
548
nir->info.cs.ptr_size == 64 ?
549
nir_address_format_32bit_offset_as_64bit :
550
nir_address_format_32bit_offset);
552
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
553
spirv_options.constant_addr_format);
554
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared,
555
spirv_options.shared_addr_format);
557
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
558
spirv_options.temp_addr_format);
560
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
561
spirv_options.global_addr_format);
563
struct nir_remove_dead_variables_options remove_dead_variables_options = {
564
.can_remove_var = can_remove_var,
566
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, &remove_dead_variables_options);
568
if (compiler_options->lower_int64_options)
569
NIR_PASS_V(nir, nir_lower_int64);
571
NIR_PASS_V(nir, nir_opt_dce);
573
if (nir->constant_data_size) {
574
const char *ptr = reinterpret_cast<const char *>(nir->constant_data);
575
const binary::section constants {
577
binary::section::data_constant,
578
nir->constant_data_size,
579
{ ptr, ptr + nir->constant_data_size }
581
nir->constant_data = NULL;
582
nir->constant_data_size = 0;
583
b.secs.push_back(constants);
586
void *mem_ctx = ralloc_context(NULL);
587
unsigned printf_info_count = nir->printf_info_count;
588
nir_printf_info *printf_infos = nir->printf_info;
590
ralloc_steal(mem_ctx, printf_infos);
594
nir_serialize(&blob, nir, false);
598
const pipe_binary_program_header header { uint32_t(blob.size) };
599
binary::section text { section_id, binary::section::text_executable, header.num_bytes, {} };
600
text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
601
reinterpret_cast<const char *>(&header) + sizeof(header));
602
text.data.insert(text.data.end(), blob.data, blob.data + blob.size);
606
b.printf_strings_in_buffer = false;
607
b.printf_infos.reserve(printf_info_count);
608
for (unsigned i = 0; i < printf_info_count; i++) {
609
binary::printf_info info;
611
info.arg_sizes.reserve(printf_infos[i].num_args);
612
for (unsigned j = 0; j < printf_infos[i].num_args; j++)
613
info.arg_sizes.push_back(printf_infos[i].arg_sizes[j]);
615
info.strings.resize(printf_infos[i].string_size);
616
memcpy(info.strings.data(), printf_infos[i].strings, printf_infos[i].string_size);
617
b.printf_infos.push_back(info);
620
ralloc_free(mem_ctx);
622
b.syms.emplace_back(sym.name, sym.attributes,
623
sym.reqd_work_group_size, section_id, 0, args);
624
b.secs.push_back(text);
630
binary clover::nir::spirv_to_nir(const binary &mod, const device &dev, std::string &r_log)
632
r_log += "SPIR-V support in clover is not enabled.\n";
633
throw error(CL_LINKER_NOT_AVAILABLE);