1
/**************************************************************************
3
* Copyright 2010 VMware, Inc.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
* USE OR OTHER DEALINGS IN THE SOFTWARE.
22
* The above copyright notice and this permission notice (including the
23
* next paragraph) shall be included in all copies or substantial portions
26
**************************************************************************/
31
* YUV pixel format manipulation.
33
* @author Jose Fonseca <jfonseca@vmware.com>
37
#include "util/format/u_format.h"
38
#include "util/u_cpu_detect.h"
40
#include "lp_bld_arit.h"
41
#include "lp_bld_type.h"
42
#include "lp_bld_const.h"
43
#include "lp_bld_conv.h"
44
#include "lp_bld_gather.h"
45
#include "lp_bld_format.h"
46
#include "lp_bld_init.h"
47
#include "lp_bld_logic.h"
50
* Extract Y, U, V channels from packed UYVY.
51
* @param packed is a <n x i32> vector with the packed UYVY blocks
52
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
55
uyvy_to_yuv_soa(struct gallivm_state *gallivm,
63
LLVMBuilderRef builder = gallivm->builder;
67
memset(&type, 0, sizeof type);
71
assert(lp_check_value(type, packed));
72
assert(lp_check_value(type, i));
76
* y = (uyvy >> (16*i + 8)) & 0xff
78
* v = (uyvy >> 16 ) & 0xff
81
* y = (uyvy >> (-16*i + 16)) & 0xff
82
* u = (uyvy >> 24) & 0xff
83
* v = (uyvy >> 8) & 0xff
86
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
88
* Avoid shift with per-element count.
89
* No support on x86, gets translated to roughly 5 instructions
90
* per element. Didn't measure performance but cuts shader size
91
* by quite a bit (less difference if cpu has no sse4.1 support).
93
if (util_get_cpu_caps()->has_sse2 && n > 1) {
94
LLVMValueRef sel, tmp, tmp2;
95
struct lp_build_context bld32;
97
lp_build_context_init(&bld32, gallivm, type);
99
tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
100
tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
101
sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
102
*y = lp_build_select(&bld32, sel, tmp, tmp2);
107
#if UTIL_ARCH_LITTLE_ENDIAN
108
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
109
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
111
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
112
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
114
*y = LLVMBuildLShr(builder, packed, shift, "");
117
#if UTIL_ARCH_LITTLE_ENDIAN
119
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
121
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
122
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
125
mask = lp_build_const_int_vec(gallivm, type, 0xff);
127
*y = LLVMBuildAnd(builder, *y, mask, "y");
128
*u = LLVMBuildAnd(builder, *u, mask, "u");
129
*v = LLVMBuildAnd(builder, *v, mask, "v");
134
* Extract Y, U, V channels from packed YUYV.
135
* @param packed is a <n x i32> vector with the packed YUYV blocks
136
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
139
yuyv_to_yuv_soa(struct gallivm_state *gallivm,
147
LLVMBuilderRef builder = gallivm->builder;
151
memset(&type, 0, sizeof type);
155
assert(lp_check_value(type, packed));
156
assert(lp_check_value(type, i));
160
* y = (yuyv >> 16*i) & 0xff
161
* u = (yuyv >> 8 ) & 0xff
162
* v = (yuyv >> 24 ) & 0xff
165
* y = (yuyv >> (-16*i + 24) & 0xff
166
* u = (yuyv >> 16) & 0xff
170
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
172
* Avoid shift with per-element count.
173
* No support on x86, gets translated to roughly 5 instructions
174
* per element. Didn't measure performance but cuts shader size
175
* by quite a bit (less difference if cpu has no sse4.1 support).
177
if (util_get_cpu_caps()->has_sse2 && n > 1) {
178
LLVMValueRef sel, tmp;
179
struct lp_build_context bld32;
181
lp_build_context_init(&bld32, gallivm, type);
183
tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
184
sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
185
*y = lp_build_select(&bld32, sel, packed, tmp);
190
#if UTIL_ARCH_LITTLE_ENDIAN
191
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
193
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
194
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
196
*y = LLVMBuildLShr(builder, packed, shift, "");
199
#if UTIL_ARCH_LITTLE_ENDIAN
200
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
201
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
203
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
207
mask = lp_build_const_int_vec(gallivm, type, 0xff);
209
*y = LLVMBuildAnd(builder, *y, mask, "y");
210
*u = LLVMBuildAnd(builder, *u, mask, "u");
211
*v = LLVMBuildAnd(builder, *v, mask, "v");
216
yuv_to_rgb_soa(struct gallivm_state *gallivm,
218
LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
219
LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
221
LLVMBuilderRef builder = gallivm->builder;
223
struct lp_build_context bld;
237
memset(&type, 0, sizeof type);
242
lp_build_context_init(&bld, gallivm, type);
244
assert(lp_check_value(type, y));
245
assert(lp_check_value(type, u));
246
assert(lp_check_value(type, v));
252
c0 = lp_build_const_int_vec(gallivm, type, 0);
253
c8 = lp_build_const_int_vec(gallivm, type, 8);
254
c16 = lp_build_const_int_vec(gallivm, type, 16);
255
c128 = lp_build_const_int_vec(gallivm, type, 128);
256
c255 = lp_build_const_int_vec(gallivm, type, 255);
258
cy = lp_build_const_int_vec(gallivm, type, 298);
259
cug = lp_build_const_int_vec(gallivm, type, -100);
260
cub = lp_build_const_int_vec(gallivm, type, 516);
261
cvr = lp_build_const_int_vec(gallivm, type, 409);
262
cvg = lp_build_const_int_vec(gallivm, type, -208);
270
y = LLVMBuildSub(builder, y, c16, "");
271
u = LLVMBuildSub(builder, u, c128, "");
272
v = LLVMBuildSub(builder, v, c128, "");
275
* r = 298 * _y + 409 * _v + 128;
276
* g = 298 * _y - 100 * _u - 208 * _v + 128;
277
* b = 298 * _y + 516 * _u + 128;
280
y = LLVMBuildMul(builder, y, cy, "");
281
y = LLVMBuildAdd(builder, y, c128, "");
283
*r = LLVMBuildMul(builder, v, cvr, "");
284
*g = LLVMBuildAdd(builder,
285
LLVMBuildMul(builder, u, cug, ""),
286
LLVMBuildMul(builder, v, cvg, ""),
288
*b = LLVMBuildMul(builder, u, cub, "");
290
*r = LLVMBuildAdd(builder, *r, y, "");
291
*g = LLVMBuildAdd(builder, *g, y, "");
292
*b = LLVMBuildAdd(builder, *b, y, "");
300
*r = LLVMBuildAShr(builder, *r, c8, "r");
301
*g = LLVMBuildAShr(builder, *g, c8, "g");
302
*b = LLVMBuildAShr(builder, *b, c8, "b");
308
*r = lp_build_clamp(&bld, *r, c0, c255);
309
*g = lp_build_clamp(&bld, *g, c0, c255);
310
*b = lp_build_clamp(&bld, *b, c0, c255);
315
rgb_to_rgba_aos(struct gallivm_state *gallivm,
317
LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
319
LLVMBuilderRef builder = gallivm->builder;
324
memset(&type, 0, sizeof type);
329
assert(lp_check_value(type, r));
330
assert(lp_check_value(type, g));
331
assert(lp_check_value(type, b));
334
* Make a 4 x unorm8 vector
337
#if UTIL_ARCH_LITTLE_ENDIAN
338
g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
339
b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
340
a = lp_build_const_int_vec(gallivm, type, 0xff000000);
342
r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
343
g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
344
b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
345
a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
349
rgba = LLVMBuildOr(builder, rgba, g, "");
350
rgba = LLVMBuildOr(builder, rgba, b, "");
351
rgba = LLVMBuildOr(builder, rgba, a, "");
353
rgba = LLVMBuildBitCast(builder, rgba,
354
LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
361
* Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
364
uyvy_to_rgba_aos(struct gallivm_state *gallivm,
369
LLVMValueRef y, u, v;
370
LLVMValueRef r, g, b;
373
uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
374
yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
375
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
382
* Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
385
yuyv_to_rgba_aos(struct gallivm_state *gallivm,
390
LLVMValueRef y, u, v;
391
LLVMValueRef r, g, b;
394
yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
395
yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
396
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
403
* Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
406
rgbg_to_rgba_aos(struct gallivm_state *gallivm,
411
LLVMValueRef r, g, b;
414
uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
415
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
422
* Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
425
grgb_to_rgba_aos(struct gallivm_state *gallivm,
430
LLVMValueRef r, g, b;
433
yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
434
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
440
* Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
443
grbr_to_rgba_aos(struct gallivm_state *gallivm,
448
LLVMValueRef r, g, b;
451
uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
452
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
459
* Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
462
rgrb_to_rgba_aos(struct gallivm_state *gallivm,
467
LLVMValueRef r, g, b;
470
yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
471
rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
477
* @param n is the number of pixels processed
478
* @param packed is a <n x i32> vector with the packed YUYV blocks
479
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
480
* @return a <4*n x i8> vector with the pixel RGBA values in AoS
483
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
484
const struct util_format_description *format_desc,
486
LLVMValueRef base_ptr,
493
struct lp_type fetch_type;
495
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
496
assert(format_desc->block.bits == 32);
497
assert(format_desc->block.width == 2);
498
assert(format_desc->block.height == 1);
500
fetch_type = lp_type_uint(32);
501
packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
505
switch (format_desc->format) {
506
case PIPE_FORMAT_UYVY:
507
rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
509
case PIPE_FORMAT_YUYV:
510
rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
512
case PIPE_FORMAT_R8G8_B8G8_UNORM:
513
rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
515
case PIPE_FORMAT_G8R8_G8B8_UNORM:
516
rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
518
case PIPE_FORMAT_G8R8_B8R8_UNORM:
519
rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
521
case PIPE_FORMAT_R8G8_R8B8_UNORM:
522
rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
526
rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));