1
/**************************************************************************
3
* Copyright 2011 Christian König
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**************************************************************************/
30
#include "pipe/p_screen.h"
31
#include "pipe/p_context.h"
33
#include "util/u_draw.h"
34
#include "util/u_sampler.h"
35
#include "util/u_inlines.h"
36
#include "util/u_memory.h"
38
#include "tgsi/tgsi_ureg.h"
40
#include "vl_defines.h"
44
#include "vl_vertex_buffers.h"
52
const int vl_zscan_normal_16[] =
54
/* Zig-Zag scan pattern */
55
0, 1, 4, 8, 5, 2, 3, 6,
56
9,12,13,10, 7,11,14,15
59
const int vl_zscan_linear[] =
61
/* Linear scan pattern */
62
0, 1, 2, 3, 4, 5, 6, 7,
63
8, 9,10,11,12,13,14,15,
64
16,17,18,19,20,21,22,23,
65
24,25,26,27,28,29,30,31,
66
32,33,34,35,36,37,38,39,
67
40,41,42,43,44,45,46,47,
68
48,49,50,51,52,53,54,55,
69
56,57,58,59,60,61,62,63
72
const int vl_zscan_normal[] =
74
/* Zig-Zag scan pattern */
75
0, 1, 8,16, 9, 2, 3,10,
76
17,24,32,25,18,11, 4, 5,
77
12,19,26,33,40,48,41,34,
78
27,20,13, 6, 7,14,21,28,
79
35,42,49,56,57,50,43,36,
80
29,22,15,23,30,37,44,51,
81
58,59,52,45,38,31,39,46,
82
53,60,61,54,47,55,62,63
85
const int vl_zscan_alternate[] =
87
/* Alternate scan pattern */
88
0, 8,16,24, 1, 9, 2,10,
89
17,25,32,40,48,56,57,49,
90
41,33,26,18, 3,11, 4,12,
91
19,27,34,42,50,58,35,43,
92
51,59,20,28, 5,13, 6,14,
93
21,29,36,44,52,60,37,45,
94
53,61,22,30, 7,15,23,31,
95
38,46,54,62,39,47,55,63
98
const int vl_zscan_h265_up_right_diagonal_16[] =
100
/* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
101
0, 4, 1, 8, 5, 2, 12, 9,
102
6, 3, 13, 10, 7, 14, 11, 15,
105
const int vl_zscan_h265_up_right_diagonal[] =
107
/* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
108
0, 8, 1, 16, 9, 2, 24, 17,
109
10, 3, 32, 25, 18, 11, 4, 40,
110
33, 26, 19, 12, 5, 48, 41, 34,
111
27, 20, 13, 6, 56, 49, 42, 35,
112
28, 21, 14, 7, 57, 50, 43, 36,
113
29, 22, 15, 58, 51, 44, 37, 30,
114
23, 59, 52, 45, 38, 31, 60, 53,
115
46, 39, 61, 54, 47, 62, 55, 63,
120
create_vert_shader(struct vl_zscan *zscan)
122
struct ureg_program *shader;
123
struct ureg_src scale;
124
struct ureg_src vrect, vpos, block_num;
126
struct ureg_dst o_vpos;
127
struct ureg_dst *o_vtex;
130
shader = ureg_create(PIPE_SHADER_VERTEX);
134
o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));
136
scale = ureg_imm2f(shader,
137
(float)VL_BLOCK_WIDTH / zscan->buffer_width,
138
(float)VL_BLOCK_HEIGHT / zscan->buffer_height);
140
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
141
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
142
block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);
144
tmp = ureg_DECL_temporary(shader);
146
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
148
for (i = 0; i < zscan->num_channels; ++i)
149
o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
152
* o_vpos.xy = (vpos + vrect) * scale
155
* tmp.xy = InstanceID / blocks_per_line
156
* tmp.x = frac(tmp.x)
157
* tmp.y = floor(tmp.y)
159
* o_vtex.x = vrect.x / blocks_per_line + tmp.x
161
* o_vtex.z = tmp.z * blocks_per_line / blocks_total
163
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
164
ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
165
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
167
ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
168
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
170
ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
171
ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));
173
for (i = 0; i < zscan->num_channels; ++i) {
174
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
175
ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
176
* ((signed)i - (signed)zscan->num_channels / 2)));
178
ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
179
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
180
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
181
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
182
ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
183
ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
186
ureg_release_temporary(shader, tmp);
191
return ureg_create_shader_and_destroy(shader, zscan->pipe);
195
create_frag_shader(struct vl_zscan *zscan)
197
struct ureg_program *shader;
198
struct ureg_src *vtex;
200
struct ureg_src samp_src, samp_scan, samp_quant;
202
struct ureg_dst *tmp;
203
struct ureg_dst quant, fragment;
207
shader = ureg_create(PIPE_SHADER_FRAGMENT);
211
vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src));
212
tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));
214
for (i = 0; i < zscan->num_channels; ++i)
215
vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
217
samp_src = ureg_DECL_sampler(shader, 0);
218
samp_scan = ureg_DECL_sampler(shader, 1);
219
samp_quant = ureg_DECL_sampler(shader, 2);
221
for (i = 0; i < zscan->num_channels; ++i)
222
tmp[i] = ureg_DECL_temporary(shader);
223
quant = ureg_DECL_temporary(shader);
225
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
228
* tmp.x = tex(vtex, 1)
230
* fragment = tex(tmp, 0) * quant
232
for (i = 0; i < zscan->num_channels; ++i)
233
ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);
235
for (i = 0; i < zscan->num_channels; ++i)
236
ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));
238
for (i = 0; i < zscan->num_channels; ++i) {
239
ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
240
ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
243
ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
244
ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));
246
for (i = 0; i < zscan->num_channels; ++i)
247
ureg_release_temporary(shader, tmp[i]);
253
return ureg_create_shader_and_destroy(shader, zscan->pipe);
257
init_shaders(struct vl_zscan *zscan)
261
zscan->vs = create_vert_shader(zscan);
265
zscan->fs = create_frag_shader(zscan);
272
zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
279
cleanup_shaders(struct vl_zscan *zscan)
283
zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
284
zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
288
init_state(struct vl_zscan *zscan)
290
struct pipe_blend_state blend;
291
struct pipe_rasterizer_state rs_state;
292
struct pipe_sampler_state sampler;
297
memset(&rs_state, 0, sizeof(rs_state));
298
rs_state.half_pixel_center = true;
299
rs_state.bottom_edge_rule = true;
300
rs_state.depth_clip_near = 1;
301
rs_state.depth_clip_far = 1;
303
zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
304
if (!zscan->rs_state)
307
memset(&blend, 0, sizeof blend);
309
blend.independent_blend_enable = 0;
310
blend.rt[0].blend_enable = 0;
311
blend.rt[0].rgb_func = PIPE_BLEND_ADD;
312
blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
313
blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
314
blend.rt[0].alpha_func = PIPE_BLEND_ADD;
315
blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
316
blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
317
blend.logicop_enable = 0;
318
blend.logicop_func = PIPE_LOGICOP_CLEAR;
319
/* Needed to allow color writes to FB, even if blending disabled */
320
blend.rt[0].colormask = PIPE_MASK_RGBA;
322
zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
326
for (i = 0; i < 3; ++i) {
327
memset(&sampler, 0, sizeof(sampler));
328
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
329
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
330
sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
331
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
332
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
333
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
334
sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
335
sampler.compare_func = PIPE_FUNC_ALWAYS;
336
sampler.normalized_coords = 1;
337
zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
338
if (!zscan->samplers[i])
345
for (i = 0; i < 2; ++i)
346
if (zscan->samplers[i])
347
zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
349
zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
352
zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
359
cleanup_state(struct vl_zscan *zscan)
365
for (i = 0; i < 3; ++i)
366
zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
368
zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
369
zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
372
struct pipe_sampler_view *
373
vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
375
const unsigned total_size = blocks_per_line * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
377
int patched_layout[64];
379
struct pipe_resource res_tmpl, *res;
380
struct pipe_sampler_view sv_tmpl, *sv;
381
struct pipe_transfer *buf_transfer;
382
unsigned x, y, i, pitch;
385
struct pipe_box rect =
388
VL_BLOCK_WIDTH * blocks_per_line,
393
assert(pipe && layout && blocks_per_line);
395
for (i = 0; i < 64; ++i)
396
patched_layout[layout[i]] = i;
398
memset(&res_tmpl, 0, sizeof(res_tmpl));
399
res_tmpl.target = PIPE_TEXTURE_2D;
400
res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
401
res_tmpl.width0 = VL_BLOCK_WIDTH * blocks_per_line;
402
res_tmpl.height0 = VL_BLOCK_HEIGHT;
404
res_tmpl.array_size = 1;
405
res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
406
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
408
res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
412
f = pipe->texture_map(pipe, res,
413
0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
414
&rect, &buf_transfer);
418
pitch = buf_transfer->stride / sizeof(float);
420
for (i = 0; i < blocks_per_line; ++i)
421
for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
422
for (x = 0; x < VL_BLOCK_WIDTH; ++x) {
423
float addr = patched_layout[x + y * VL_BLOCK_WIDTH] +
424
i * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
428
f[i * VL_BLOCK_WIDTH + y * pitch + x] = addr;
431
pipe->texture_unmap(pipe, buf_transfer);
433
memset(&sv_tmpl, 0, sizeof(sv_tmpl));
434
u_sampler_view_default_template(&sv_tmpl, res, res->format);
435
sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
436
pipe_resource_reference(&res, NULL);
443
pipe_resource_reference(&res, NULL);
450
vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
451
unsigned buffer_width, unsigned buffer_height,
452
unsigned blocks_per_line, unsigned blocks_total,
453
unsigned num_channels)
455
assert(zscan && pipe);
458
zscan->buffer_width = buffer_width;
459
zscan->buffer_height = buffer_height;
460
zscan->num_channels = num_channels;
461
zscan->blocks_per_line = blocks_per_line;
462
zscan->blocks_total = blocks_total;
464
if(!init_shaders(zscan))
467
if(!init_state(zscan)) {
468
cleanup_shaders(zscan);
476
vl_zscan_cleanup(struct vl_zscan *zscan)
480
cleanup_shaders(zscan);
481
cleanup_state(zscan);
485
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
486
struct pipe_sampler_view *src, struct pipe_surface *dst)
488
struct pipe_resource res_tmpl, *res;
489
struct pipe_sampler_view sv_tmpl;
491
assert(zscan && buffer);
493
memset(buffer, 0, sizeof(struct vl_zscan_buffer));
495
pipe_sampler_view_reference(&buffer->src, src);
497
buffer->viewport.scale[0] = dst->width;
498
buffer->viewport.scale[1] = dst->height;
499
buffer->viewport.scale[2] = 1;
500
buffer->viewport.translate[0] = 0;
501
buffer->viewport.translate[1] = 0;
502
buffer->viewport.translate[2] = 0;
503
buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
504
buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
505
buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
506
buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
508
buffer->fb_state.width = dst->width;
509
buffer->fb_state.height = dst->height;
510
buffer->fb_state.nr_cbufs = 1;
511
pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
513
memset(&res_tmpl, 0, sizeof(res_tmpl));
514
res_tmpl.target = PIPE_TEXTURE_3D;
515
res_tmpl.format = PIPE_FORMAT_R8_UNORM;
516
res_tmpl.width0 = VL_BLOCK_WIDTH * zscan->blocks_per_line;
517
res_tmpl.height0 = VL_BLOCK_HEIGHT;
519
res_tmpl.array_size = 1;
520
res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
521
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
523
res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
527
memset(&sv_tmpl, 0, sizeof(sv_tmpl));
528
u_sampler_view_default_template(&sv_tmpl, res, res->format);
529
sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
530
buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
531
pipe_resource_reference(&res, NULL);
539
vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
543
pipe_sampler_view_reference(&buffer->src, NULL);
544
pipe_sampler_view_reference(&buffer->layout, NULL);
545
pipe_sampler_view_reference(&buffer->quant, NULL);
546
pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
550
vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
555
pipe_sampler_view_reference(&buffer->layout, layout);
559
vl_zscan_upload_quant(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
560
const uint8_t matrix[64], bool intra)
562
struct pipe_context *pipe;
563
struct pipe_transfer *buf_transfer;
564
unsigned x, y, i, pitch;
567
struct pipe_box rect =
580
rect.width *= zscan->blocks_per_line;
582
data = pipe->texture_map(pipe, buffer->quant->texture,
584
PIPE_MAP_DISCARD_RANGE,
585
&rect, &buf_transfer);
589
pitch = buf_transfer->stride;
591
for (i = 0; i < zscan->blocks_per_line; ++i)
592
for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
593
for (x = 0; x < VL_BLOCK_WIDTH; ++x)
594
data[i * VL_BLOCK_WIDTH + y * pitch + x] = matrix[x + y * VL_BLOCK_WIDTH];
596
pipe->texture_unmap(pipe, buf_transfer);
600
vl_zscan_render(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, unsigned num_instances)
604
zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
605
zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
606
zscan->pipe->bind_sampler_states(zscan->pipe, PIPE_SHADER_FRAGMENT,
607
0, 3, zscan->samplers);
608
zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
609
zscan->pipe->set_viewport_states(zscan->pipe, 0, 1, &buffer->viewport);
610
zscan->pipe->set_sampler_views(zscan->pipe, PIPE_SHADER_FRAGMENT,
611
0, 3, 0, false, &buffer->src);
612
zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
613
zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
614
util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);