2
* Copyright © 2021 Google
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27
#include "nir/nir_builder.h"
28
#include "radv_meta.h"
29
#include "radv_private.h"
31
#include "vk_format.h"
34
* https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp
35
* https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp
37
* With some differences:
38
* - Use the vk format to do all the settings.
39
* - Combine the ETC2 and EAC shaders.
40
* - Since we combined the above, reuse the function for the ETC2 A8 component.
41
* - the EAC shader doesn't do SNORM correctly, so this has that fixed.
45
flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
48
for (unsigned i = 0; i < cnt; ++i) {
49
nir_ssa_def *intermediate[4];
50
nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
51
for (unsigned j = 0; j < 4; ++j)
52
intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
54
b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
55
nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0)));
57
return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
61
etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
63
const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42},
64
{18, 60}, {24, 80}, {33, 106}, {47, 183}};
65
nir_ssa_def *upper = nir_ieq_imm(b, y, 1);
66
nir_ssa_def *result = NULL;
67
for (unsigned i = 0; i < 8; ++i) {
69
nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
71
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
79
etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
81
const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
82
nir_ssa_def *result = NULL;
83
for (unsigned i = 0; i < 8; ++i) {
85
result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result);
87
result = nir_imm_int(b, table[i]);
93
etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
95
const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
96
0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
97
nir_ssa_def *result = NULL;
98
for (unsigned i = 0; i < 16; ++i) {
99
nir_ssa_def *tmp = nir_imm_int(b, table[i]);
101
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
105
return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4));
109
etc_extend(nir_builder *b, nir_ssa_def *v, int bits)
112
return nir_imul_imm(b, v, 0x11);
113
return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits)));
117
decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel,
118
bool eac, nir_ssa_def *is_signed)
120
alpha_payload = flip_endian(b, alpha_payload, 2);
121
nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
122
nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0);
123
nir_ssa_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
124
nir_ssa_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
125
nir_ssa_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
126
nir_ssa_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
129
nir_ssa_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
130
signed_base = nir_imul_imm(b, signed_base, 8);
131
base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4);
132
base = nir_bcsel(b, is_signed, signed_base, base);
133
multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
136
nir_ssa_def *lsb_index =
137
nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
138
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
139
bit_offset = nir_iadd_imm(b, bit_offset, 2);
141
nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
142
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
144
nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
145
nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
147
nir_ssa_def *low_bound = nir_imm_int(b, 0);
148
nir_ssa_def *high_bound = nir_imm_int(b, 255);
149
nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0);
151
low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
152
high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
154
nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
157
return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult);
161
build_shader(struct radv_device *dev)
163
const struct glsl_type *sampler_type_2d =
164
glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_FLOAT);
165
const struct glsl_type *sampler_type_3d =
166
glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT);
167
const struct glsl_type *img_type_2d =
168
glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
169
const struct glsl_type *img_type_3d =
170
glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
171
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_decode_etc");
172
b.shader->info.workgroup_size[0] = 8;
173
b.shader->info.workgroup_size[1] = 8;
175
nir_variable *input_img_2d =
176
nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
177
input_img_2d->data.descriptor_set = 0;
178
input_img_2d->data.binding = 0;
180
nir_variable *input_img_3d =
181
nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
182
input_img_2d->data.descriptor_set = 0;
183
input_img_2d->data.binding = 0;
185
nir_variable *output_img_2d =
186
nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
187
output_img_2d->data.descriptor_set = 0;
188
output_img_2d->data.binding = 1;
190
nir_variable *output_img_3d =
191
nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
192
output_img_3d->data.descriptor_set = 0;
193
output_img_3d->data.binding = 1;
195
nir_ssa_def *global_id = get_global_ids(&b, 3);
197
nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
198
nir_ssa_def *consts2 =
199
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
200
nir_ssa_def *offset = nir_channels(&b, consts, 7);
201
nir_ssa_def *format = nir_channel(&b, consts, 3);
202
nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
203
nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
204
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
205
nir_ssa_def *src_coord =
206
nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
207
nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
209
nir_variable *payload_var =
210
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
211
nir_push_if(&b, is_3d);
213
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_3d)->dest.ssa;
215
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
216
tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
217
tex->op = nir_texop_txf;
218
tex->src[0].src_type = nir_tex_src_coord;
219
tex->src[0].src = nir_src_for_ssa(src_coord);
220
tex->src[1].src_type = nir_tex_src_lod;
221
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
222
tex->src[2].src_type = nir_tex_src_texture_deref;
223
tex->src[2].src = nir_src_for_ssa(tex_deref);
224
tex->dest_type = nir_type_uint32;
225
tex->is_array = false;
226
tex->coord_components = 3;
228
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
229
nir_builder_instr_insert(&b, &tex->instr);
230
nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
232
nir_push_else(&b, NULL);
234
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_2d)->dest.ssa;
236
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
237
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
238
tex->op = nir_texop_txf;
239
tex->src[0].src_type = nir_tex_src_coord;
240
tex->src[0].src = nir_src_for_ssa(src_coord);
241
tex->src[1].src_type = nir_tex_src_lod;
242
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
243
tex->src[2].src_type = nir_tex_src_texture_deref;
244
tex->src[2].src = nir_src_for_ssa(tex_deref);
245
tex->dest_type = nir_type_uint32;
246
tex->is_array = true;
247
tex->coord_components = 3;
249
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
250
nir_builder_instr_insert(&b, &tex->instr);
251
nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
253
nir_pop_if(&b, NULL);
255
nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
256
nir_ssa_def *linear_pixel = nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4),
257
nir_channel(&b, pixel_coord, 1));
259
nir_ssa_def *payload = nir_load_var(&b, payload_var);
260
nir_variable *color =
261
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
262
nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
263
nir_push_if(&b, nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11_UNORM_BLOCK)));
265
nir_ssa_def *alpha_bits_8 =
266
nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
267
nir_ssa_def *alpha_bits_1 =
268
nir_iand(&b, nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK)),
269
nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)));
271
nir_ssa_def *color_payload =
272
nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
273
color_payload = flip_endian(&b, color_payload, 2);
274
nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
275
nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
276
nir_ssa_def *flip = nir_test_mask(&b, color_y, 1);
277
nir_ssa_def *subblock = nir_ushr_imm(
278
&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)),
281
nir_variable *punchthrough =
282
nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
283
nir_ssa_def *punchthrough_init =
284
nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
285
nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
287
nir_variable *etc1_compat =
288
nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
289
nir_store_var(&b, etc1_compat, nir_imm_bool(&b, false), 0x1);
291
nir_variable *alpha_result =
292
nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result");
293
nir_push_if(&b, alpha_bits_8);
297
decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1);
299
nir_push_else(&b, NULL);
301
nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1);
303
nir_pop_if(&b, NULL);
305
const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
306
nir_variable *rgb_result =
307
nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
308
nir_variable *base_rgb =
309
nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
310
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
313
nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
314
nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
317
&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
319
nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
321
for (unsigned i = 0; i < 3; ++i)
325
nir_ushr(&b, color_y,
326
nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))),
329
nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7);
331
nir_push_else(&b, NULL);
333
nir_ssa_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
334
nir_ssa_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
335
nir_ssa_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
336
nir_ssa_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
337
nir_ssa_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
338
nir_ssa_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
339
nir_ssa_def *r1 = nir_iadd(&b, rb, rd);
340
nir_ssa_def *g1 = nir_iadd(&b, gb, gd);
341
nir_ssa_def *b1 = nir_iadd(&b, bb, bd);
343
nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), r1));
345
nir_ssa_def *r0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2),
346
nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
347
nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
348
nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
349
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
350
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
351
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
352
nir_ssa_def *da = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1),
353
nir_iand_imm(&b, color_y, 1));
354
nir_ssa_def *dist = etc2_distance_lookup(&b, da);
355
nir_ssa_def *index = nir_ior(&b, lsb, msb);
357
nir_store_var(&b, punchthrough,
358
nir_iand(&b, nir_load_var(&b, punchthrough),
359
nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
361
nir_push_if(&b, nir_ieq_imm(&b, index, 0));
363
nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7);
365
nir_push_else(&b, NULL);
368
nir_ssa_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
369
nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
370
nir_store_var(&b, rgb_result, tmp, 0x7);
372
nir_pop_if(&b, NULL);
374
nir_push_else(&b, NULL);
375
nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), g1));
377
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
378
nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
379
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
380
nir_ssa_def *b0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3),
381
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
382
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
383
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
384
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
385
nir_ssa_def *da = nir_iand_imm(&b, color_y, 4);
386
nir_ssa_def *db = nir_iand_imm(&b, color_y, 1);
387
nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
389
nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
391
nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
392
d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
393
nir_ssa_def *dist = etc2_distance_lookup(&b, d);
394
nir_ssa_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2),
395
nir_vec3(&b, r0, g0, b0));
396
base = etc_extend(&b, base, 4);
397
base = nir_iadd(&b, base,
398
nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
399
nir_store_var(&b, rgb_result, base, 0x7);
400
nir_store_var(&b, punchthrough,
401
nir_iand(&b, nir_load_var(&b, punchthrough),
402
nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
405
nir_push_else(&b, NULL);
406
nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), b1));
408
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
409
nir_ssa_def *g0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6),
410
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
412
nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
413
nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20),
414
nir_ubfe_imm(&b, color_y, 7, 3)));
415
nir_ssa_def *rh = nir_ior(&b, nir_iand_imm(&b, color_y, 1),
416
nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
417
nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
418
nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
419
nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
420
nir_ssa_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
421
nir_ssa_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
423
r0 = etc_extend(&b, r0, 6);
424
g0 = etc_extend(&b, g0, 7);
425
b0 = etc_extend(&b, b0, 6);
426
rh = etc_extend(&b, rh, 6);
427
rv = etc_extend(&b, rv, 6);
428
gh = etc_extend(&b, gh, 7);
429
gv = etc_extend(&b, gv, 7);
430
bh = etc_extend(&b, bh, 6);
431
bv = etc_extend(&b, bv, 6);
433
nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
434
nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb),
435
nir_channel(&b, pixel_coord, 0));
436
nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb),
437
nir_channel(&b, pixel_coord, 1));
438
rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
439
nir_store_var(&b, rgb_result, rgb, 0x7);
440
nir_store_var(&b, punchthrough, nir_imm_bool(&b, false), 0x1);
442
nir_push_else(&b, NULL);
444
nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
445
nir_ssa_def *subblock_b = nir_ine_imm(&b, subblock, 0);
446
nir_ssa_def *tmp[] = {
447
nir_bcsel(&b, subblock_b, r1, rb),
448
nir_bcsel(&b, subblock_b, g1, gb),
449
nir_bcsel(&b, subblock_b, b1, bb),
451
nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7);
453
nir_pop_if(&b, NULL);
454
nir_pop_if(&b, NULL);
455
nir_pop_if(&b, NULL);
457
nir_pop_if(&b, NULL);
458
nir_push_if(&b, nir_load_var(&b, etc1_compat));
460
nir_ssa_def *etc1_table_index = nir_ubfe(
461
&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
462
nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb);
463
sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
464
nir_store_var(&b, punchthrough,
465
nir_iand(&b, nir_load_var(&b, punchthrough),
466
nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
469
nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
470
nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
471
nir_store_var(&b, rgb_result, result, 0x7);
473
nir_pop_if(&b, NULL);
474
nir_push_if(&b, nir_load_var(&b, punchthrough));
476
nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1);
477
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
479
nir_pop_if(&b, NULL);
481
for (unsigned i = 0; i < 3; ++i)
482
col[i] = nir_fdiv(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)),
483
nir_imm_float(&b, 255.0));
484
col[3] = nir_load_var(&b, alpha_result);
485
nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf);
487
nir_push_else(&b, NULL);
489
nir_ssa_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
490
nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
492
for (int i = 0; i < 2; ++i) {
493
val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true,
496
val[2] = nir_imm_float(&b, 0.0);
497
val[3] = nir_imm_float(&b, 1.0);
498
nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf);
500
nir_pop_if(&b, NULL);
502
nir_ssa_def *outval = nir_load_var(&b, color);
503
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
504
nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32));
506
nir_push_if(&b, is_3d);
508
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord,
509
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
510
.image_dim = GLSL_SAMPLER_DIM_3D);
512
nir_push_else(&b, NULL);
514
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord,
515
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
516
.image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
518
nir_pop_if(&b, NULL);
523
create_layout(struct radv_device *device)
526
VkDescriptorSetLayoutCreateInfo ds_create_info = {
527
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
528
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
530
.pBindings = (VkDescriptorSetLayoutBinding[]){
532
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
533
.descriptorCount = 1,
534
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
535
.pImmutableSamplers = NULL},
537
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
538
.descriptorCount = 1,
539
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
540
.pImmutableSamplers = NULL},
543
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
544
&device->meta_state.alloc,
545
&device->meta_state.etc_decode.ds_layout);
546
if (result != VK_SUCCESS)
549
VkPipelineLayoutCreateInfo pl_create_info = {
550
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
552
.pSetLayouts = &device->meta_state.etc_decode.ds_layout,
553
.pushConstantRangeCount = 1,
554
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
558
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
559
&device->meta_state.alloc, &device->meta_state.etc_decode.p_layout);
560
if (result != VK_SUCCESS)
568
create_decode_pipeline(struct radv_device *device, VkPipeline *pipeline)
572
mtx_lock(&device->meta_state.mtx);
574
mtx_unlock(&device->meta_state.mtx);
578
nir_shader *cs = build_shader(device);
582
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
583
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
584
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
585
.module = vk_shader_module_handle_from_nir(cs),
587
.pSpecializationInfo = NULL,
590
VkComputePipelineCreateInfo vk_pipeline_info = {
591
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
592
.stage = pipeline_shader_stage,
594
.layout = device->meta_state.resolve_compute.p_layout,
597
result = radv_CreateComputePipelines(radv_device_to_handle(device),
598
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
599
&vk_pipeline_info, NULL, pipeline);
600
if (result != VK_SUCCESS)
604
mtx_unlock(&device->meta_state.mtx);
608
mtx_unlock(&device->meta_state.mtx);
613
radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
615
struct radv_meta_state *state = &device->meta_state;
618
if (!device->physical_device->emulate_etc2)
621
res = create_layout(device);
622
if (res != VK_SUCCESS)
628
return create_decode_pipeline(device, &state->etc_decode.pipeline);
632
radv_device_finish_meta_etc_decode_state(struct radv_device *device)
634
struct radv_meta_state *state = &device->meta_state;
635
radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc);
636
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout,
638
device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
639
state->etc_decode.ds_layout, &state->alloc);
643
radv_get_etc_decode_pipeline(struct radv_cmd_buffer *cmd_buffer)
645
struct radv_device *device = cmd_buffer->device;
646
VkPipeline *pipeline = &device->meta_state.etc_decode.pipeline;
651
ret = create_decode_pipeline(device, pipeline);
652
if (ret != VK_SUCCESS) {
653
cmd_buffer->record_result = ret;
654
return VK_NULL_HANDLE;
662
decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
663
struct radv_image_view *dest_iview, const VkOffset3D *offset, const VkExtent3D *extent)
665
struct radv_device *device = cmd_buffer->device;
667
radv_meta_push_descriptor_set(
668
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
670
2, /* descriptorWriteCount */
671
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
673
.dstArrayElement = 0,
674
.descriptorCount = 1,
675
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
677
(VkDescriptorImageInfo[]){
678
{.sampler = VK_NULL_HANDLE,
679
.imageView = radv_image_view_to_handle(src_iview),
680
.imageLayout = VK_IMAGE_LAYOUT_GENERAL},
682
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
684
.dstArrayElement = 0,
685
.descriptorCount = 1,
686
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
687
.pImageInfo = (VkDescriptorImageInfo[]){
689
.sampler = VK_NULL_HANDLE,
690
.imageView = radv_image_view_to_handle(dest_iview),
691
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
695
VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer);
697
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
700
unsigned push_constants[5] = {
701
offset->x, offset->y, offset->z, src_iview->image->vk.format, src_iview->image->vk.image_type,
704
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
705
device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
706
0, 20, push_constants);
707
radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth);
711
radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
712
VkImageLayout layout, const VkImageSubresourceLayers *subresource,
713
VkOffset3D offset, VkExtent3D extent)
715
struct radv_meta_saved_state saved_state;
716
radv_meta_save(&saved_state, cmd_buffer,
717
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS |
718
RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
720
uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset);
721
uint32_t slice_count = image->vk.image_type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount;
723
extent = vk_image_sanitize_extent(&image->vk, extent);
724
offset = vk_image_sanitize_offset(&image->vk, offset);
726
VkFormat load_format = vk_format_get_blocksize(image->vk.format) == 16
727
? VK_FORMAT_R32G32B32A32_UINT
728
: VK_FORMAT_R32G32_UINT;
729
struct radv_image_view src_iview;
730
radv_image_view_init(
731
&src_iview, cmd_buffer->device,
732
&(VkImageViewCreateInfo){
733
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
734
.image = radv_image_to_handle(image),
735
.viewType = radv_meta_get_view_type(image),
736
.format = load_format,
739
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
740
.baseMipLevel = subresource->mipLevel,
743
.layerCount = subresource->baseArrayLayer + subresource->layerCount,
748
VkFormat store_format;
749
switch (image->vk.format) {
750
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
751
store_format = VK_FORMAT_R16_UNORM;
753
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
754
store_format = VK_FORMAT_R16_SNORM;
756
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
757
store_format = VK_FORMAT_R16G16_UNORM;
759
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
760
store_format = VK_FORMAT_R16G16_SNORM;
763
store_format = VK_FORMAT_R8G8B8A8_UNORM;
765
struct radv_image_view dest_iview;
766
radv_image_view_init(
767
&dest_iview, cmd_buffer->device,
768
&(VkImageViewCreateInfo){
769
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
770
.image = radv_image_to_handle(image),
771
.viewType = radv_meta_get_view_type(image),
772
.format = store_format,
775
.aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT,
776
.baseMipLevel = subresource->mipLevel,
779
.layerCount = subresource->baseArrayLayer + subresource->layerCount,
784
decode_etc(cmd_buffer, &src_iview, &dest_iview, &(VkOffset3D){offset.x, offset.y, base_slice},
785
&(VkExtent3D){extent.width, extent.height, slice_count});
787
radv_image_view_finish(&src_iview);
788
radv_image_view_finish(&dest_iview);
790
radv_meta_restore(&saved_state, cmd_buffer);