193
149
{ 77, 23, 60, 15, 72, 21, 56, 14, },
197
static av_always_inline void
198
yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199
int lumFilterSize, const int16_t *chrFilter,
200
const int16_t **chrUSrc, const int16_t **chrVSrc,
201
int chrFilterSize, const int16_t **alpSrc,
202
uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203
uint16_t *aDest, int dstW, int chrDstW,
204
int big_endian, int output_bits)
206
//FIXME Optimize (just quickly written not optimized..)
152
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153
{ 36, 68, 60, 92, 34, 66, 58, 90,},
154
{ 100, 4,124, 28, 98, 2,122, 26,},
155
{ 52, 84, 44, 76, 50, 82, 42, 74,},
156
{ 116, 20,108, 12,114, 18,106, 10,},
157
{ 32, 64, 56, 88, 38, 70, 62, 94,},
158
{ 96, 0,120, 24,102, 6,126, 30,},
159
{ 48, 80, 40, 72, 54, 86, 46, 78,},
160
{ 112, 16,104, 8,118, 22,110, 14,},
162
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163
{ 64, 64, 64, 64, 64, 64, 64, 64 };
165
#define output_pixel(pos, val, bias, signedness) \
167
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172
static av_always_inline void
173
yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174
int big_endian, int output_bits)
177
int shift = 19 - output_bits;
179
for (i = 0; i < dstW; i++) {
180
int val = src[i] + (1 << (shift - 1));
181
output_pixel(&dest[i], val, 0, uint);
185
static av_always_inline void
186
yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187
const int32_t **src, uint16_t *dest, int dstW,
188
int big_endian, int output_bits)
191
int shift = 15 + 16 - output_bits;
193
for (i = 0; i < dstW; i++) {
194
int val = 1 << (30-output_bits);
197
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198
* filters (or anything with negative coeffs, the range can be slightly
199
* wider in both directions. To account for this overflow, we subtract
200
* a constant so it always fits in the signed range (assuming a
201
* reasonable filterSize), and re-add that at the end. */
203
for (j = 0; j < filterSize; j++)
204
val += src[j][i] * filter[j];
206
output_pixel(&dest[i], val, 0x8000, int);
212
#define output_pixel(pos, val) \
214
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219
static av_always_inline void
220
yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221
int big_endian, int output_bits)
224
int shift = 15 - output_bits;
226
for (i = 0; i < dstW; i++) {
227
int val = src[i] + (1 << (shift - 1));
228
output_pixel(&dest[i], val);
232
static av_always_inline void
233
yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234
const int16_t **src, uint16_t *dest, int dstW,
235
int big_endian, int output_bits)
208
238
int shift = 11 + 16 - output_bits;
210
#define output_pixel(pos, val) \
212
if (output_bits == 16) { \
213
AV_WB16(pos, av_clip_uint16(val >> shift)); \
215
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218
if (output_bits == 16) { \
219
AV_WL16(pos, av_clip_uint16(val >> shift)); \
221
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224
240
for (i = 0; i < dstW; i++) {
225
241
int val = 1 << (26-output_bits);
228
for (j = 0; j < lumFilterSize; j++)
229
val += lumSrc[j][i] * lumFilter[j];
244
for (j = 0; j < filterSize; j++)
245
val += src[j][i] * filter[j];
231
247
output_pixel(&dest[i], val);
235
for (i = 0; i < chrDstW; i++) {
236
int u = 1 << (26-output_bits);
237
int v = 1 << (26-output_bits);
240
for (j = 0; j < chrFilterSize; j++) {
241
u += chrUSrc[j][i] * chrFilter[j];
242
v += chrVSrc[j][i] * chrFilter[j];
245
output_pixel(&uDest[i], u);
246
output_pixel(&vDest[i], v);
250
if (CONFIG_SWSCALE_ALPHA && aDest) {
251
for (i = 0; i < dstW; i++) {
252
int val = 1 << (26-output_bits);
255
for (j = 0; j < lumFilterSize; j++)
256
val += alpSrc[j][i] * lumFilter[j];
258
output_pixel(&aDest[i], val);
261
251
#undef output_pixel
264
#define yuv2NBPS(bits, BE_LE, is_be) \
265
static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266
const int16_t **lumSrc, int lumFilterSize, \
267
const int16_t *chrFilter, const int16_t **chrUSrc, \
268
const int16_t **chrVSrc, \
269
int chrFilterSize, const int16_t **alpSrc, \
270
uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271
uint8_t *_aDest, int dstW, int chrDstW) \
273
uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274
*vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275
yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278
dest, uDest, vDest, aDest, \
279
dstW, chrDstW, is_be, bits); \
288
static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289
const int16_t **lumSrc, int lumFilterSize,
290
const int16_t *chrFilter, const int16_t **chrUSrc,
291
const int16_t **chrVSrc,
292
int chrFilterSize, const int16_t **alpSrc,
293
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294
uint8_t *aDest, int dstW, int chrDstW)
253
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254
static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255
uint8_t *dest, int dstW, \
256
const uint8_t *dither, int offset)\
258
yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259
(uint16_t *) dest, dstW, is_be, bits); \
261
static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262
const int16_t **src, uint8_t *dest, int dstW, \
263
const uint8_t *dither, int offset)\
265
yuv2planeX_## template_size ## _c_template(filter, \
266
filterSize, (const typeX_t **) src, \
267
(uint16_t *) dest, dstW, is_be, bits); \
269
yuv2NBPS( 9, BE, 1, 10, int16_t)
270
yuv2NBPS( 9, LE, 0, 10, int16_t)
271
yuv2NBPS(10, BE, 1, 10, int16_t)
272
yuv2NBPS(10, LE, 0, 10, int16_t)
273
yuv2NBPS(16, BE, 1, 16, int32_t)
274
yuv2NBPS(16, LE, 0, 16, int32_t)
276
static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277
const int16_t **src, uint8_t *dest, int dstW,
278
const uint8_t *dither, int offset)
296
//FIXME Optimize (just quickly written not optimized..)
298
281
for (i=0; i<dstW; i++) {
282
int val = dither[(i + offset) & 7] << 12;
301
for (j=0; j<lumFilterSize; j++)
302
val += lumSrc[j][i] * lumFilter[j];
284
for (j=0; j<filterSize; j++)
285
val += src[j][i] * filter[j];
304
287
dest[i]= av_clip_uint8(val>>19);
308
for (i=0; i<chrDstW; i++) {
312
for (j=0; j<chrFilterSize; j++) {
313
u += chrUSrc[j][i] * chrFilter[j];
314
v += chrVSrc[j][i] * chrFilter[j];
317
uDest[i]= av_clip_uint8(u>>19);
318
vDest[i]= av_clip_uint8(v>>19);
321
if (CONFIG_SWSCALE_ALPHA && aDest)
322
for (i=0; i<dstW; i++) {
325
for (j=0; j<lumFilterSize; j++)
326
val += alpSrc[j][i] * lumFilter[j];
328
aDest[i]= av_clip_uint8(val>>19);
332
static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
333
const int16_t *chrUSrc, const int16_t *chrVSrc,
334
const int16_t *alpSrc,
335
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
336
uint8_t *aDest, int dstW, int chrDstW)
291
static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292
const uint8_t *dither, int offset)
339
295
for (i=0; i<dstW; i++) {
340
int val= (lumSrc[i]+64)>>7;
296
int val = (src[i] + dither[(i + offset) & 7]) >> 7;
341
297
dest[i]= av_clip_uint8(val);
345
for (i=0; i<chrDstW; i++) {
346
int u=(chrUSrc[i]+64)>>7;
347
int v=(chrVSrc[i]+64)>>7;
348
uDest[i]= av_clip_uint8(u);
349
vDest[i]= av_clip_uint8(v);
352
if (CONFIG_SWSCALE_ALPHA && aDest)
353
for (i=0; i<dstW; i++) {
354
int val= (alpSrc[i]+64)>>7;
355
aDest[i]= av_clip_uint8(val);
359
static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
360
const int16_t **lumSrc, int lumFilterSize,
361
const int16_t *chrFilter, const int16_t **chrUSrc,
362
const int16_t **chrVSrc, int chrFilterSize,
363
const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
364
uint8_t *vDest, uint8_t *aDest,
365
int dstW, int chrDstW)
301
static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302
const int16_t **chrUSrc, const int16_t **chrVSrc,
303
uint8_t *dest, int chrDstW)
367
305
enum PixelFormat dstFormat = c->dstFormat;
369
//FIXME Optimize (just quickly written not optimized..)
306
const uint8_t *chrDither = c->chrDither8;
371
for (i=0; i<dstW; i++) {
374
for (j=0; j<lumFilterSize; j++)
375
val += lumSrc[j][i] * lumFilter[j];
377
dest[i]= av_clip_uint8(val>>19);
383
309
if (dstFormat == PIX_FMT_NV12)
384
310
for (i=0; i<chrDstW; i++) {
311
int u = chrDither[i & 7] << 12;
312
int v = chrDither[(i + 3) & 7] << 12;
388
314
for (j=0; j<chrFilterSize; j++) {
389
315
u += chrUSrc[j][i] * chrFilter[j];
390
316
v += chrVSrc[j][i] * chrFilter[j];
393
uDest[2*i]= av_clip_uint8(u>>19);
394
uDest[2*i+1]= av_clip_uint8(v>>19);
319
dest[2*i]= av_clip_uint8(u>>19);
320
dest[2*i+1]= av_clip_uint8(v>>19);
397
323
for (i=0; i<chrDstW; i++) {
324
int u = chrDither[i & 7] << 12;
325
int v = chrDither[(i + 3) & 7] << 12;
401
327
for (j=0; j<chrFilterSize; j++) {
402
328
u += chrUSrc[j][i] * chrFilter[j];
403
329
v += chrVSrc[j][i] * chrFilter[j];
406
uDest[2*i]= av_clip_uint8(v>>19);
407
uDest[2*i+1]= av_clip_uint8(u>>19);
332
dest[2*i]= av_clip_uint8(v>>19);
333
dest[2*i+1]= av_clip_uint8(u>>19);
418
344
static av_always_inline void
419
345
yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
420
const int16_t **lumSrc, int lumFilterSize,
421
const int16_t *chrFilter, const int16_t **chrUSrc,
422
const int16_t **chrVSrc, int chrFilterSize,
423
const int16_t **alpSrc, uint8_t *dest, int dstW,
346
const int32_t **lumSrc, int lumFilterSize,
347
const int16_t *chrFilter, const int32_t **chrUSrc,
348
const int32_t **chrVSrc, int chrFilterSize,
349
const int32_t **alpSrc, uint16_t *dest, int dstW,
424
350
int y, enum PixelFormat target)
428
354
for (i = 0; i < (dstW >> 1); i++) {
432
const int i2 = 2 * i;
356
int Y1 = (1 << 14) - 0x40000000;
357
int Y2 = (1 << 14) - 0x40000000;
434
359
for (j = 0; j < lumFilterSize; j++) {
435
Y1 += lumSrc[j][i2] * lumFilter[j];
436
Y2 += lumSrc[j][i2+1] * lumFilter[j];
440
if ((Y1 | Y2) & 0x10000) {
441
Y1 = av_clip_uint16(Y1);
442
Y2 = av_clip_uint16(Y2);
444
output_pixel(&dest[2 * i2 + 0], Y1);
445
output_pixel(&dest[2 * i2 + 2], Y2);
360
Y1 += lumSrc[j][i * 2] * lumFilter[j];
361
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365
Y1 = av_clip_int16(Y1);
366
Y2 = av_clip_int16(Y2);
367
output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368
output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
449
372
static av_always_inline void
450
yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
451
const uint16_t *buf1, const uint16_t *ubuf0,
452
const uint16_t *ubuf1, const uint16_t *vbuf0,
453
const uint16_t *vbuf1, const uint16_t *abuf0,
454
const uint16_t *abuf1, uint8_t *dest, int dstW,
373
yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374
const int32_t *ubuf[2], const int32_t *vbuf[2],
375
const int32_t *abuf[2], uint16_t *dest, int dstW,
455
376
int yalpha, int uvalpha, int y,
456
377
enum PixelFormat target)
458
int yalpha1 = 4095 - yalpha; \
379
int yalpha1 = 4095 - yalpha;
381
const int32_t *buf0 = buf[0], *buf1 = buf[1];
461
383
for (i = 0; i < (dstW >> 1); i++) {
462
const int i2 = 2 * i;
463
int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
464
int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
384
int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
466
output_pixel(&dest[2 * i2 + 0], Y1);
467
output_pixel(&dest[2 * i2 + 2], Y2);
387
output_pixel(&dest[i * 2 + 0], Y1);
388
output_pixel(&dest[i * 2 + 1], Y2);
471
392
static av_always_inline void
472
yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
473
const uint16_t *ubuf0, const uint16_t *ubuf1,
474
const uint16_t *vbuf0, const uint16_t *vbuf1,
475
const uint16_t *abuf0, uint8_t *dest, int dstW,
476
int uvalpha, enum PixelFormat dstFormat,
477
int flags, int y, enum PixelFormat target)
393
yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394
const int32_t *ubuf[2], const int32_t *vbuf[2],
395
const int32_t *abuf0, uint16_t *dest, int dstW,
396
int uvalpha, int y, enum PixelFormat target)
481
400
for (i = 0; i < (dstW >> 1); i++) {
482
const int i2 = 2 * i;
483
int Y1 = buf0[i2 ] << 1;
484
int Y2 = buf0[i2+1] << 1;
401
int Y1 = buf0[i * 2 ] << 1;
402
int Y2 = buf0[i * 2 + 1] << 1;
486
output_pixel(&dest[2 * i2 + 0], Y1);
487
output_pixel(&dest[2 * i2 + 2], Y2);
404
output_pixel(&dest[i * 2 + 0], Y1);
405
output_pixel(&dest[i * 2 + 1], Y2);
491
409
#undef output_pixel
493
#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
411
#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
494
412
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
495
const int16_t **lumSrc, int lumFilterSize, \
496
const int16_t *chrFilter, const int16_t **chrUSrc, \
497
const int16_t **chrVSrc, int chrFilterSize, \
498
const int16_t **alpSrc, uint8_t *dest, int dstW, \
413
const int16_t **_lumSrc, int lumFilterSize, \
414
const int16_t *chrFilter, const int16_t **_chrUSrc, \
415
const int16_t **_chrVSrc, int chrFilterSize, \
416
const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419
const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420
**chrUSrc = (const int32_t **) _chrUSrc, \
421
**chrVSrc = (const int32_t **) _chrVSrc, \
422
**alpSrc = (const int32_t **) _alpSrc; \
423
uint16_t *dest = (uint16_t *) _dest; \
501
424
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
502
425
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
503
426
alpSrc, dest, dstW, y, fmt); \
506
static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
507
const uint16_t *buf1, const uint16_t *ubuf0, \
508
const uint16_t *ubuf1, const uint16_t *vbuf0, \
509
const uint16_t *vbuf1, const uint16_t *abuf0, \
510
const uint16_t *abuf1, uint8_t *dest, int dstW, \
429
static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431
const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
511
432
int yalpha, int uvalpha, int y) \
513
name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
514
vbuf0, vbuf1, abuf0, abuf1, \
434
const int32_t **buf = (const int32_t **) _buf, \
435
**ubuf = (const int32_t **) _ubuf, \
436
**vbuf = (const int32_t **) _vbuf, \
437
**abuf = (const int32_t **) _abuf; \
438
uint16_t *dest = (uint16_t *) _dest; \
439
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
515
440
dest, dstW, yalpha, uvalpha, y, fmt); \
518
static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
519
const uint16_t *ubuf0, const uint16_t *ubuf1, \
520
const uint16_t *vbuf0, const uint16_t *vbuf1, \
521
const uint16_t *abuf0, uint8_t *dest, int dstW, \
522
int uvalpha, enum PixelFormat dstFormat, \
443
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445
const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446
int uvalpha, int y) \
525
name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
526
vbuf1, abuf0, dest, dstW, uvalpha, \
527
dstFormat, flags, y, fmt); \
448
const int32_t *buf0 = (const int32_t *) _buf0, \
449
**ubuf = (const int32_t **) _ubuf, \
450
**vbuf = (const int32_t **) _vbuf, \
451
*abuf0 = (const int32_t *) _abuf0; \
452
uint16_t *dest = (uint16_t *) _dest; \
453
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454
dstW, uvalpha, y, fmt); \
530
YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
531
YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
457
YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458
YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
533
460
#define output_pixel(pos, acc) \
534
461
if (target == PIX_FMT_MONOBLACK) { \
770
733
U += chrUSrc[j][i] * chrFilter[j];
771
734
V += chrVSrc[j][i] * chrFilter[j];
777
if ((Y1 | Y2 | U | V) & 0x100) {
778
Y1 = av_clip_uint8(Y1);
779
Y2 = av_clip_uint8(Y2);
780
U = av_clip_uint8(U);
781
V = av_clip_uint8(V);
784
/* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
785
r = (const uint8_t *) c->table_rV[V];
786
g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
787
b = (const uint8_t *) c->table_bU[U];
789
dest[ 0] = dest[ 1] = r_b[Y1];
790
dest[ 2] = dest[ 3] = g[Y1];
791
dest[ 4] = dest[ 5] = b_r[Y1];
792
dest[ 6] = dest[ 7] = r_b[Y2];
793
dest[ 8] = dest[ 9] = g[Y2];
794
dest[10] = dest[11] = b_r[Y2];
737
// 8bit: 12+15=27; 16-bit: 12+19=31
745
// 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746
Y1 -= c->yuv2rgb_y_offset;
747
Y2 -= c->yuv2rgb_y_offset;
748
Y1 *= c->yuv2rgb_y_coeff;
749
Y2 *= c->yuv2rgb_y_coeff;
752
// 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754
R = V * c->yuv2rgb_v2r_coeff;
755
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756
B = U * c->yuv2rgb_u2b_coeff;
758
// 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
799
769
static av_always_inline void
800
yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
801
const uint16_t *buf1, const uint16_t *ubuf0,
802
const uint16_t *ubuf1, const uint16_t *vbuf0,
803
const uint16_t *vbuf1, const uint16_t *abuf0,
804
const uint16_t *abuf1, uint8_t *dest, int dstW,
770
yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771
const int32_t *ubuf[2], const int32_t *vbuf[2],
772
const int32_t *abuf[2], uint16_t *dest, int dstW,
805
773
int yalpha, int uvalpha, int y,
806
774
enum PixelFormat target)
776
const int32_t *buf0 = buf[0], *buf1 = buf[1],
777
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779
int yalpha1 = 4095 - yalpha;
780
int uvalpha1 = 4095 - uvalpha;
783
for (i = 0; i < (dstW >> 1); i++) {
784
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790
Y1 -= c->yuv2rgb_y_offset;
791
Y2 -= c->yuv2rgb_y_offset;
792
Y1 *= c->yuv2rgb_y_coeff;
793
Y2 *= c->yuv2rgb_y_coeff;
797
R = V * c->yuv2rgb_v2r_coeff;
798
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799
B = U * c->yuv2rgb_u2b_coeff;
801
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811
static av_always_inline void
812
yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813
const int32_t *ubuf[2], const int32_t *vbuf[2],
814
const int32_t *abuf0, uint16_t *dest, int dstW,
815
int uvalpha, int y, enum PixelFormat target)
817
const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821
if (uvalpha < 2048) {
822
for (i = 0; i < (dstW >> 1); i++) {
823
int Y1 = (buf0[i * 2] ) >> 2;
824
int Y2 = (buf0[i * 2 + 1]) >> 2;
825
int U = (ubuf0[i] + (-128 << 11)) >> 2;
826
int V = (vbuf0[i] + (-128 << 11)) >> 2;
829
Y1 -= c->yuv2rgb_y_offset;
830
Y2 -= c->yuv2rgb_y_offset;
831
Y1 *= c->yuv2rgb_y_coeff;
832
Y2 *= c->yuv2rgb_y_coeff;
836
R = V * c->yuv2rgb_v2r_coeff;
837
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838
B = U * c->yuv2rgb_u2b_coeff;
840
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849
for (i = 0; i < (dstW >> 1); i++) {
850
int Y1 = (buf0[i * 2] ) >> 2;
851
int Y2 = (buf0[i * 2 + 1]) >> 2;
852
int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853
int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856
Y1 -= c->yuv2rgb_y_offset;
857
Y2 -= c->yuv2rgb_y_offset;
858
Y1 *= c->yuv2rgb_y_coeff;
859
Y2 *= c->yuv2rgb_y_coeff;
863
R = V * c->yuv2rgb_v2r_coeff;
864
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865
B = U * c->yuv2rgb_u2b_coeff;
867
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884
YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885
YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
888
* Write out 2 RGB pixels in the target pixel format. This function takes a
889
* R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890
* things like endianness conversion and shifting. The caller takes care of
891
* setting the correct offset in these tables from the chroma (U/V) values.
892
* This function then uses the luminance (Y1/Y2) values to write out the
893
* correct RGB values into the destination buffer.
895
static av_always_inline void
896
yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897
unsigned A1, unsigned A2,
898
const void *_r, const void *_g, const void *_b, int y,
899
enum PixelFormat target, int hasAlpha)
901
if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902
target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903
uint32_t *dest = (uint32_t *) _dest;
904
const uint32_t *r = (const uint32_t *) _r;
905
const uint32_t *g = (const uint32_t *) _g;
906
const uint32_t *b = (const uint32_t *) _b;
909
int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
911
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
915
int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
917
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
920
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
924
} else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925
uint8_t *dest = (uint8_t *) _dest;
926
const uint8_t *r = (const uint8_t *) _r;
927
const uint8_t *g = (const uint8_t *) _g;
928
const uint8_t *b = (const uint8_t *) _b;
930
#define r_b ((target == PIX_FMT_RGB24) ? r : b)
931
#define b_r ((target == PIX_FMT_RGB24) ? b : r)
932
dest[i * 6 + 0] = r_b[Y1];
933
dest[i * 6 + 1] = g[Y1];
934
dest[i * 6 + 2] = b_r[Y1];
935
dest[i * 6 + 3] = r_b[Y2];
936
dest[i * 6 + 4] = g[Y2];
937
dest[i * 6 + 5] = b_r[Y2];
940
} else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941
target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942
target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943
uint16_t *dest = (uint16_t *) _dest;
944
const uint16_t *r = (const uint16_t *) _r;
945
const uint16_t *g = (const uint16_t *) _g;
946
const uint16_t *b = (const uint16_t *) _b;
947
int dr1, dg1, db1, dr2, dg2, db2;
949
if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950
dr1 = dither_2x2_8[ y & 1 ][0];
951
dg1 = dither_2x2_4[ y & 1 ][0];
952
db1 = dither_2x2_8[(y & 1) ^ 1][0];
953
dr2 = dither_2x2_8[ y & 1 ][1];
954
dg2 = dither_2x2_4[ y & 1 ][1];
955
db2 = dither_2x2_8[(y & 1) ^ 1][1];
956
} else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957
dr1 = dither_2x2_8[ y & 1 ][0];
958
dg1 = dither_2x2_8[ y & 1 ][1];
959
db1 = dither_2x2_8[(y & 1) ^ 1][0];
960
dr2 = dither_2x2_8[ y & 1 ][1];
961
dg2 = dither_2x2_8[ y & 1 ][0];
962
db2 = dither_2x2_8[(y & 1) ^ 1][1];
964
dr1 = dither_4x4_16[ y & 3 ][0];
965
dg1 = dither_4x4_16[ y & 3 ][1];
966
db1 = dither_4x4_16[(y & 3) ^ 3][0];
967
dr2 = dither_4x4_16[ y & 3 ][1];
968
dg2 = dither_4x4_16[ y & 3 ][0];
969
db2 = dither_4x4_16[(y & 3) ^ 3][1];
972
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974
} else /* 8/4-bit */ {
975
uint8_t *dest = (uint8_t *) _dest;
976
const uint8_t *r = (const uint8_t *) _r;
977
const uint8_t *g = (const uint8_t *) _g;
978
const uint8_t *b = (const uint8_t *) _b;
979
int dr1, dg1, db1, dr2, dg2, db2;
981
if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982
const uint8_t * const d64 = dither_8x8_73[y & 7];
983
const uint8_t * const d32 = dither_8x8_32[y & 7];
984
dr1 = dg1 = d32[(i * 2 + 0) & 7];
985
db1 = d64[(i * 2 + 0) & 7];
986
dr2 = dg2 = d32[(i * 2 + 1) & 7];
987
db2 = d64[(i * 2 + 1) & 7];
989
const uint8_t * const d64 = dither_8x8_73 [y & 7];
990
const uint8_t * const d128 = dither_8x8_220[y & 7];
991
dr1 = db1 = d128[(i * 2 + 0) & 7];
992
dg1 = d64[(i * 2 + 0) & 7];
993
dr2 = db2 = d128[(i * 2 + 1) & 7];
994
dg2 = d64[(i * 2 + 1) & 7];
997
if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998
dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999
((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1001
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1007
static av_always_inline void
1008
yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009
const int16_t **lumSrc, int lumFilterSize,
1010
const int16_t *chrFilter, const int16_t **chrUSrc,
1011
const int16_t **chrVSrc, int chrFilterSize,
1012
const int16_t **alpSrc, uint8_t *dest, int dstW,
1013
int y, enum PixelFormat target, int hasAlpha)
1017
for (i = 0; i < (dstW >> 1); i++) {
1023
int av_unused A1, A2;
1024
const void *r, *g, *b;
1026
for (j = 0; j < lumFilterSize; j++) {
1027
Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1030
for (j = 0; j < chrFilterSize; j++) {
1031
U += chrUSrc[j][i] * chrFilter[j];
1032
V += chrVSrc[j][i] * chrFilter[j];
1038
if ((Y1 | Y2 | U | V) & 0x100) {
1039
Y1 = av_clip_uint8(Y1);
1040
Y2 = av_clip_uint8(Y2);
1041
U = av_clip_uint8(U);
1042
V = av_clip_uint8(V);
1047
for (j = 0; j < lumFilterSize; j++) {
1048
A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049
A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1053
if ((A1 | A2) & 0x100) {
1054
A1 = av_clip_uint8(A1);
1055
A2 = av_clip_uint8(A2);
1059
/* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1061
g = (c->table_gU[U] + c->table_gV[V]);
1064
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065
r, g, b, y, target, hasAlpha);
1069
static av_always_inline void
1070
yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071
const int16_t *ubuf[2], const int16_t *vbuf[2],
1072
const int16_t *abuf[2], uint8_t *dest, int dstW,
1073
int yalpha, int uvalpha, int y,
1074
enum PixelFormat target, int hasAlpha)
1076
const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079
*abuf0 = hasAlpha ? abuf[0] : NULL,
1080
*abuf1 = hasAlpha ? abuf[1] : NULL;
808
1081
int yalpha1 = 4095 - yalpha;
809
1082
int uvalpha1 = 4095 - uvalpha;
862
1138
int Y2 = buf0[i * 2 + 1] >> 7;
863
1139
int U = (ubuf0[i] + ubuf1[i]) >> 8;
864
1140
int V = (vbuf0[i] + vbuf1[i]) >> 8;
865
const uint8_t *r = (const uint8_t *) c->table_rV[V],
866
*g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
867
*b = (const uint8_t *) c->table_bU[U];
869
dest[ 0] = dest[ 1] = r_b[Y1];
870
dest[ 2] = dest[ 3] = g[Y1];
871
dest[ 4] = dest[ 5] = b_r[Y1];
872
dest[ 6] = dest[ 7] = r_b[Y2];
873
dest[ 8] = dest[ 9] = g[Y2];
874
dest[10] = dest[11] = b_r[Y2];
883
YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
884
//YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
885
YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
886
//YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
888
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
889
for (i=0; i<(dstW>>1); i++) {\
895
int av_unused A1, A2;\
896
type av_unused *r, *b, *g;\
899
for (j=0; j<lumFilterSize; j++) {\
900
Y1 += lumSrc[j][i2] * lumFilter[j];\
901
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
903
for (j=0; j<chrFilterSize; j++) {\
904
U += chrUSrc[j][i] * chrFilter[j];\
905
V += chrVSrc[j][i] * chrFilter[j];\
911
if ((Y1|Y2|U|V)&0x100) {\
912
Y1 = av_clip_uint8(Y1); \
913
Y2 = av_clip_uint8(Y2); \
914
U = av_clip_uint8(U); \
915
V = av_clip_uint8(V); \
920
for (j=0; j<lumFilterSize; j++) {\
921
A1 += alpSrc[j][i2 ] * lumFilter[j];\
922
A2 += alpSrc[j][i2+1] * lumFilter[j];\
926
if ((A1|A2)&0x100) {\
927
A1 = av_clip_uint8(A1); \
928
A2 = av_clip_uint8(A2); \
931
/* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
932
r = (type *)c->table_rV[V]; \
933
g = (type *)(c->table_gU[U] + c->table_gV[V]); \
934
b = (type *)c->table_bU[U];
936
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
937
for (i=0; i<dstW; i++) {\
945
for (j=0; j<lumFilterSize; j++) {\
946
Y += lumSrc[j][i ] * lumFilter[j];\
948
for (j=0; j<chrFilterSize; j++) {\
949
U += chrUSrc[j][i] * chrFilter[j];\
950
V += chrVSrc[j][i] * chrFilter[j];\
957
for (j=0; j<lumFilterSize; j++)\
958
A += alpSrc[j][i ] * lumFilter[j];\
961
A = av_clip_uint8(A);\
963
Y-= c->yuv2rgb_y_offset;\
964
Y*= c->yuv2rgb_y_coeff;\
966
R= Y + V*c->yuv2rgb_v2r_coeff;\
967
G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
968
B= Y + U*c->yuv2rgb_u2b_coeff;\
969
if ((R|G|B)&(0xC0000000)) {\
970
R = av_clip_uintp2(R, 30); \
971
G = av_clip_uintp2(G, 30); \
972
B = av_clip_uintp2(B, 30); \
975
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
976
for (i=0; i<(dstW>>1); i++) { \
978
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
979
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
980
int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
981
int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
982
type av_unused *r, *b, *g; \
983
int av_unused A1, A2; \
985
A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
986
A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
988
r = (type *)c->table_rV[V];\
989
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
990
b = (type *)c->table_bU[U];
992
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
993
for (i=0; i<(dstW>>1); i++) {\
995
int Y1= buf0[i2 ]>>7;\
996
int Y2= buf0[i2+1]>>7;\
997
int U= (ubuf1[i])>>7;\
998
int V= (vbuf1[i])>>7;\
999
type av_unused *r, *b, *g;\
1000
int av_unused A1, A2;\
1003
A2= abuf0[i2+1]>>7;\
1005
r = (type *)c->table_rV[V];\
1006
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1007
b = (type *)c->table_bU[U];
1009
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
1010
for (i=0; i<(dstW>>1); i++) {\
1012
int Y1= buf0[i2 ]>>7;\
1013
int Y2= buf0[i2+1]>>7;\
1014
int U= (ubuf0[i] + ubuf1[i])>>8;\
1015
int V= (vbuf0[i] + vbuf1[i])>>8;\
1016
type av_unused *r, *b, *g;\
1017
int av_unused A1, A2;\
1020
A2= abuf0[i2+1]>>7;\
1022
r = (type *)c->table_rV[V];\
1023
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1024
b = (type *)c->table_bU[U];
1026
#define YSCALE_YUV_2_ANYRGB_C(func)\
1027
switch(c->dstFormat) {\
1030
if (CONFIG_SMALL) {\
1031
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1032
func(uint32_t,needAlpha)\
1033
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
1034
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
1037
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1039
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
1040
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
1044
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1045
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1052
if (CONFIG_SMALL) {\
1053
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1054
func(uint32_t,needAlpha)\
1055
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
1056
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
1059
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1061
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
1062
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
1066
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1067
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1072
case PIX_FMT_RGB24:\
1074
((uint8_t*)dest)[0]= r[Y1];\
1075
((uint8_t*)dest)[1]= g[Y1];\
1076
((uint8_t*)dest)[2]= b[Y1];\
1077
((uint8_t*)dest)[3]= r[Y2];\
1078
((uint8_t*)dest)[4]= g[Y2];\
1079
((uint8_t*)dest)[5]= b[Y2];\
1083
case PIX_FMT_BGR24:\
1085
((uint8_t*)dest)[0]= b[Y1];\
1086
((uint8_t*)dest)[1]= g[Y1];\
1087
((uint8_t*)dest)[2]= r[Y1];\
1088
((uint8_t*)dest)[3]= b[Y2];\
1089
((uint8_t*)dest)[4]= g[Y2];\
1090
((uint8_t*)dest)[5]= r[Y2];\
1094
case PIX_FMT_RGB565:\
1095
case PIX_FMT_BGR565:\
1097
const int dr1= dither_2x2_8[y&1 ][0];\
1098
const int dg1= dither_2x2_4[y&1 ][0];\
1099
const int db1= dither_2x2_8[(y&1)^1][0];\
1100
const int dr2= dither_2x2_8[y&1 ][1];\
1101
const int dg2= dither_2x2_4[y&1 ][1];\
1102
const int db2= dither_2x2_8[(y&1)^1][1];\
1104
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1105
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1109
case PIX_FMT_RGB555:\
1110
case PIX_FMT_BGR555:\
1112
const int dr1= dither_2x2_8[y&1 ][0];\
1113
const int dg1= dither_2x2_8[y&1 ][1];\
1114
const int db1= dither_2x2_8[(y&1)^1][0];\
1115
const int dr2= dither_2x2_8[y&1 ][1];\
1116
const int dg2= dither_2x2_8[y&1 ][0];\
1117
const int db2= dither_2x2_8[(y&1)^1][1];\
1119
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1120
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1124
case PIX_FMT_RGB444:\
1125
case PIX_FMT_BGR444:\
1127
const int dr1= dither_4x4_16[y&3 ][0];\
1128
const int dg1= dither_4x4_16[y&3 ][1];\
1129
const int db1= dither_4x4_16[(y&3)^3][0];\
1130
const int dr2= dither_4x4_16[y&3 ][1];\
1131
const int dg2= dither_4x4_16[y&3 ][0];\
1132
const int db2= dither_4x4_16[(y&3)^3][1];\
1134
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1135
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1142
const uint8_t * const d64= dither_8x8_73[y&7];\
1143
const uint8_t * const d32= dither_8x8_32[y&7];\
1145
((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
1146
((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
1153
const uint8_t * const d64= dither_8x8_73 [y&7];\
1154
const uint8_t * const d128=dither_8x8_220[y&7];\
1156
((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
1157
+ ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
1161
case PIX_FMT_RGB4_BYTE:\
1162
case PIX_FMT_BGR4_BYTE:\
1164
const uint8_t * const d64= dither_8x8_73 [y&7];\
1165
const uint8_t * const d128=dither_8x8_220[y&7];\
1167
((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
1168
((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
1174
static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
1142
const void *r = c->table_rV[V],
1143
*g = (c->table_gU[U] + c->table_gV[V]),
1144
*b = c->table_bU[U];
1147
A1 = abuf0[i * 2 ] >> 7;
1148
A2 = abuf0[i * 2 + 1] >> 7;
1151
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152
r, g, b, y, target, hasAlpha);
1157
#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159
const int16_t **lumSrc, int lumFilterSize, \
1160
const int16_t *chrFilter, const int16_t **chrUSrc, \
1161
const int16_t **chrVSrc, int chrFilterSize, \
1162
const int16_t **alpSrc, uint8_t *dest, int dstW, \
1165
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167
alpSrc, dest, dstW, y, fmt, hasAlpha); \
1169
#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170
YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172
const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173
const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174
int yalpha, int uvalpha, int y) \
1176
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177
dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1180
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181
const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182
const int16_t *abuf0, uint8_t *dest, int dstW, \
1183
int uvalpha, int y) \
1185
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186
dstW, uvalpha, y, fmt, hasAlpha); \
1190
YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191
YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1193
#if CONFIG_SWSCALE_ALPHA
1194
YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195
YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1197
YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198
YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1200
YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201
YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202
YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203
YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204
YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205
YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206
YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207
YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1209
static av_always_inline void
1210
yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1175
1211
const int16_t **lumSrc, int lumFilterSize,
1176
1212
const int16_t *chrFilter, const int16_t **chrUSrc,
1177
1213
const int16_t **chrVSrc, int chrFilterSize,
1178
const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1181
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
1184
static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
1185
const int16_t **lumSrc, int lumFilterSize,
1186
const int16_t *chrFilter, const int16_t **chrUSrc,
1187
const int16_t **chrVSrc, int chrFilterSize,
1188
const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1191
int step= c->dstFormatBpp/8;
1194
switch(c->dstFormat) {
1202
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1203
YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1204
dest[aidx]= needAlpha ? A : 255;
1211
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1212
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1220
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1237
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1238
YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1239
dest[aidx]= needAlpha ? A : 255;
1246
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1247
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1255
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1271
* vertical bilinear scale YV12 to RGB
1273
static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1274
const uint16_t *buf1, const uint16_t *ubuf0,
1275
const uint16_t *ubuf1, const uint16_t *vbuf0,
1276
const uint16_t *vbuf1, const uint16_t *abuf0,
1277
const uint16_t *abuf1, uint8_t *dest, int dstW,
1278
int yalpha, int uvalpha, int y)
1280
int yalpha1=4095- yalpha;
1281
int uvalpha1=4095-uvalpha;
1284
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
1288
* YV12 to RGB without scaling or interpolating
1290
static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1291
const uint16_t *ubuf0, const uint16_t *ubuf1,
1292
const uint16_t *vbuf0, const uint16_t *vbuf1,
1293
const uint16_t *abuf0, uint8_t *dest, int dstW,
1294
int uvalpha, enum PixelFormat dstFormat,
1299
if (uvalpha < 2048) {
1300
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
1302
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
1214
const int16_t **alpSrc, uint8_t *dest,
1215
int dstW, int y, enum PixelFormat target, int hasAlpha)
1218
int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1220
for (i = 0; i < dstW; i++) {
1228
for (j = 0; j < lumFilterSize; j++) {
1229
Y += lumSrc[j][i] * lumFilter[j];
1231
for (j = 0; j < chrFilterSize; j++) {
1232
U += chrUSrc[j][i] * chrFilter[j];
1233
V += chrVSrc[j][i] * chrFilter[j];
1240
for (j = 0; j < lumFilterSize; j++) {
1241
A += alpSrc[j][i] * lumFilter[j];
1245
A = av_clip_uint8(A);
1247
Y -= c->yuv2rgb_y_offset;
1248
Y *= c->yuv2rgb_y_coeff;
1250
R = Y + V*c->yuv2rgb_v2r_coeff;
1251
G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252
B = Y + U*c->yuv2rgb_u2b_coeff;
1253
if ((R | G | B) & 0xC0000000) {
1254
R = av_clip_uintp2(R, 30);
1255
G = av_clip_uintp2(G, 30);
1256
B = av_clip_uintp2(B, 30);
1261
dest[0] = hasAlpha ? A : 255;
1275
dest[3] = hasAlpha ? A : 255;
1278
dest[0] = hasAlpha ? A : 255;
1293
dest[3] = hasAlpha ? A : 255;
1301
YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302
YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303
YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304
YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1306
#if CONFIG_SWSCALE_ALPHA
1307
YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308
YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309
YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310
YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1312
YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313
YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314
YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315
YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1317
YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318
YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1306
1320
static av_always_inline void fillPlane(uint8_t* plane, int stride,
1307
1321
int width, int height,
1508
1531
maskr, maskg, maskb, rsh, gsh, bsh, S); \
1511
rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1512
rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1513
rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1514
rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1515
rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1516
rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1517
rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1518
rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1519
rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1520
rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1521
rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1522
rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1534
rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535
rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536
rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537
rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538
rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539
rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540
rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1541
rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1542
rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1543
rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1544
rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1545
rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1546
rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1547
rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1548
rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1549
rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1524
1551
static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1790
static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1793
for (i = 0; i < width; i++) {
1798
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1802
static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1805
const uint16_t **src = (const uint16_t **) _src;
1806
uint16_t *dst = (uint16_t *) _dst;
1807
for (i = 0; i < width; i++) {
1808
int g = AV_RL16(src[0] + i);
1809
int b = AV_RL16(src[1] + i);
1810
int r = AV_RL16(src[2] + i);
1812
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1816
static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1819
const uint16_t **src = (const uint16_t **) _src;
1820
uint16_t *dst = (uint16_t *) _dst;
1821
for (i = 0; i < width; i++) {
1822
int g = AV_RB16(src[0] + i);
1823
int b = AV_RB16(src[1] + i);
1824
int r = AV_RB16(src[2] + i);
1826
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1830
static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1833
for (i = 0; i < width; i++) {
1838
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1843
static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1846
const uint16_t **src = (const uint16_t **) _src;
1847
uint16_t *dstU = (uint16_t *) _dstU;
1848
uint16_t *dstV = (uint16_t *) _dstV;
1849
for (i = 0; i < width; i++) {
1850
int g = AV_RL16(src[0] + i);
1851
int b = AV_RL16(src[1] + i);
1852
int r = AV_RL16(src[2] + i);
1854
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1859
static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1862
const uint16_t **src = (const uint16_t **) _src;
1863
uint16_t *dstU = (uint16_t *) _dstU;
1864
uint16_t *dstV = (uint16_t *) _dstV;
1865
for (i = 0; i < width; i++) {
1866
int g = AV_RB16(src[0] + i);
1867
int b = AV_RB16(src[1] + i);
1868
int r = AV_RB16(src[2] + i);
1870
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1875
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1876
const int16_t *filter,
1877
const int16_t *filterPos, int filterSize)
1880
int32_t *dst = (int32_t *) _dst;
1881
const uint16_t *src = (const uint16_t *) _src;
1882
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1885
for (i = 0; i < dstW; i++) {
1887
int srcPos = filterPos[i];
1890
for (j = 0; j < filterSize; j++) {
1891
val += src[srcPos + j] * filter[filterSize * i + j];
1893
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1894
dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1898
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1899
const int16_t *filter,
1900
const int16_t *filterPos, int filterSize)
1903
const uint16_t *src = (const uint16_t *) _src;
1904
int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1906
for (i = 0; i < dstW; i++) {
1908
int srcPos = filterPos[i];
1911
for (j = 0; j < filterSize; j++) {
1912
val += src[srcPos + j] * filter[filterSize * i + j];
1914
// filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1915
dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1808
1919
// bilinear / bicubic scaling
1809
static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1811
const int16_t *filter, const int16_t *filterPos,
1920
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1921
const int16_t *filter, const int16_t *filterPos,
1815
1925
for (i=0; i<dstW; i++) {
1937
2111
static av_always_inline void
1938
2112
find_c_packed_planar_out_funcs(SwsContext *c,
1939
yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2113
yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2114
yuv2interleavedX_fn *yuv2nv12cX,
1940
2115
yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1941
2116
yuv2packedX_fn *yuv2packedX)
1943
2118
enum PixelFormat dstFormat = c->dstFormat;
1945
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1946
*yuv2yuvX = yuv2nv12X_c;
1947
} else if (is16BPS(dstFormat)) {
1948
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2120
if (is16BPS(dstFormat)) {
2121
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2122
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
1949
2123
} else if (is9_OR_10BPS(dstFormat)) {
1950
2124
if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1951
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2125
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2126
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
1953
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2128
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2129
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
1956
*yuv2yuv1 = yuv2yuv1_c;
1957
*yuv2yuvX = yuv2yuvX_c;
2132
*yuv2plane1 = yuv2plane1_8_c;
2133
*yuv2planeX = yuv2planeX_8_c;
2134
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2135
*yuv2nv12cX = yuv2nv12cX_c;
1959
2138
if(c->flags & SWS_FULL_CHR_H_INT) {
1960
*yuv2packedX = yuv2rgbX_c_full;
2139
switch (dstFormat) {
2142
*yuv2packedX = yuv2rgba32_full_X_c;
2144
#if CONFIG_SWSCALE_ALPHA
2146
*yuv2packedX = yuv2rgba32_full_X_c;
2148
#endif /* CONFIG_SWSCALE_ALPHA */
2150
*yuv2packedX = yuv2rgbx32_full_X_c;
2152
#endif /* !CONFIG_SMALL */
2156
*yuv2packedX = yuv2argb32_full_X_c;
2158
#if CONFIG_SWSCALE_ALPHA
2160
*yuv2packedX = yuv2argb32_full_X_c;
2162
#endif /* CONFIG_SWSCALE_ALPHA */
2164
*yuv2packedX = yuv2xrgb32_full_X_c;
2166
#endif /* !CONFIG_SMALL */
2170
*yuv2packedX = yuv2bgra32_full_X_c;
2172
#if CONFIG_SWSCALE_ALPHA
2174
*yuv2packedX = yuv2bgra32_full_X_c;
2176
#endif /* CONFIG_SWSCALE_ALPHA */
2178
*yuv2packedX = yuv2bgrx32_full_X_c;
2180
#endif /* !CONFIG_SMALL */
2184
*yuv2packedX = yuv2abgr32_full_X_c;
2186
#if CONFIG_SWSCALE_ALPHA
2188
*yuv2packedX = yuv2abgr32_full_X_c;
2190
#endif /* CONFIG_SWSCALE_ALPHA */
2192
*yuv2packedX = yuv2xbgr32_full_X_c;
2194
#endif /* !CONFIG_SMALL */
2197
*yuv2packedX = yuv2rgb24_full_X_c;
2200
*yuv2packedX = yuv2bgr24_full_X_c;
1962
2204
switch (dstFormat) {
1963
case PIX_FMT_GRAY16BE:
1964
*yuv2packed1 = yuv2gray16BE_1_c;
1965
*yuv2packed2 = yuv2gray16BE_2_c;
1966
*yuv2packedX = yuv2gray16BE_X_c;
1968
case PIX_FMT_GRAY16LE:
1969
*yuv2packed1 = yuv2gray16LE_1_c;
1970
*yuv2packed2 = yuv2gray16LE_2_c;
1971
*yuv2packedX = yuv2gray16LE_X_c;
1973
case PIX_FMT_MONOWHITE:
1974
*yuv2packed1 = yuv2monowhite_1_c;
1975
*yuv2packed2 = yuv2monowhite_2_c;
1976
*yuv2packedX = yuv2monowhite_X_c;
1978
case PIX_FMT_MONOBLACK:
1979
*yuv2packed1 = yuv2monoblack_1_c;
1980
*yuv2packed2 = yuv2monoblack_2_c;
1981
*yuv2packedX = yuv2monoblack_X_c;
1983
case PIX_FMT_YUYV422:
1984
*yuv2packed1 = yuv2yuyv422_1_c;
1985
*yuv2packed2 = yuv2yuyv422_2_c;
1986
*yuv2packedX = yuv2yuyv422_X_c;
1988
case PIX_FMT_UYVY422:
1989
*yuv2packed1 = yuv2uyvy422_1_c;
1990
*yuv2packed2 = yuv2uyvy422_2_c;
1991
*yuv2packedX = yuv2uyvy422_X_c;
1993
2205
case PIX_FMT_RGB48LE:
1994
//*yuv2packed1 = yuv2rgb48le_1_c;
1995
//*yuv2packed2 = yuv2rgb48le_2_c;
1996
//*yuv2packedX = yuv2rgb48le_X_c;
2206
*yuv2packed1 = yuv2rgb48le_1_c;
2207
*yuv2packed2 = yuv2rgb48le_2_c;
2208
*yuv2packedX = yuv2rgb48le_X_c;
1998
2210
case PIX_FMT_RGB48BE:
1999
2211
*yuv2packed1 = yuv2rgb48be_1_c;
2000
2212
*yuv2packed2 = yuv2rgb48be_2_c;
2001
2213
*yuv2packedX = yuv2rgb48be_X_c;
2003
2215
case PIX_FMT_BGR48LE:
2004
//*yuv2packed1 = yuv2bgr48le_1_c;
2005
//*yuv2packed2 = yuv2bgr48le_2_c;
2006
//*yuv2packedX = yuv2bgr48le_X_c;
2216
*yuv2packed1 = yuv2bgr48le_1_c;
2217
*yuv2packed2 = yuv2bgr48le_2_c;
2218
*yuv2packedX = yuv2bgr48le_X_c;
2008
2220
case PIX_FMT_BGR48BE:
2009
2221
*yuv2packed1 = yuv2bgr48be_1_c;
2010
2222
*yuv2packed2 = yuv2bgr48be_2_c;
2011
2223
*yuv2packedX = yuv2bgr48be_X_c;
2014
*yuv2packed1 = yuv2packed1_c;
2015
*yuv2packed2 = yuv2packed2_c;
2016
*yuv2packedX = yuv2packedX_c;
2228
*yuv2packed1 = yuv2rgb32_1_c;
2229
*yuv2packed2 = yuv2rgb32_2_c;
2230
*yuv2packedX = yuv2rgb32_X_c;
2232
#if CONFIG_SWSCALE_ALPHA
2234
*yuv2packed1 = yuv2rgba32_1_c;
2235
*yuv2packed2 = yuv2rgba32_2_c;
2236
*yuv2packedX = yuv2rgba32_X_c;
2238
#endif /* CONFIG_SWSCALE_ALPHA */
2240
*yuv2packed1 = yuv2rgbx32_1_c;
2241
*yuv2packed2 = yuv2rgbx32_2_c;
2242
*yuv2packedX = yuv2rgbx32_X_c;
2244
#endif /* !CONFIG_SMALL */
2246
case PIX_FMT_RGB32_1:
2247
case PIX_FMT_BGR32_1:
2249
*yuv2packed1 = yuv2rgb32_1_1_c;
2250
*yuv2packed2 = yuv2rgb32_1_2_c;
2251
*yuv2packedX = yuv2rgb32_1_X_c;
2253
#if CONFIG_SWSCALE_ALPHA
2255
*yuv2packed1 = yuv2rgba32_1_1_c;
2256
*yuv2packed2 = yuv2rgba32_1_2_c;
2257
*yuv2packedX = yuv2rgba32_1_X_c;
2259
#endif /* CONFIG_SWSCALE_ALPHA */
2261
*yuv2packed1 = yuv2rgbx32_1_1_c;
2262
*yuv2packed2 = yuv2rgbx32_1_2_c;
2263
*yuv2packedX = yuv2rgbx32_1_X_c;
2265
#endif /* !CONFIG_SMALL */
2268
*yuv2packed1 = yuv2rgb24_1_c;
2269
*yuv2packed2 = yuv2rgb24_2_c;
2270
*yuv2packedX = yuv2rgb24_X_c;
2273
*yuv2packed1 = yuv2bgr24_1_c;
2274
*yuv2packed2 = yuv2bgr24_2_c;
2275
*yuv2packedX = yuv2bgr24_X_c;
2277
case PIX_FMT_RGB565LE:
2278
case PIX_FMT_RGB565BE:
2279
case PIX_FMT_BGR565LE:
2280
case PIX_FMT_BGR565BE:
2281
*yuv2packed1 = yuv2rgb16_1_c;
2282
*yuv2packed2 = yuv2rgb16_2_c;
2283
*yuv2packedX = yuv2rgb16_X_c;
2285
case PIX_FMT_RGB555LE:
2286
case PIX_FMT_RGB555BE:
2287
case PIX_FMT_BGR555LE:
2288
case PIX_FMT_BGR555BE:
2289
*yuv2packed1 = yuv2rgb15_1_c;
2290
*yuv2packed2 = yuv2rgb15_2_c;
2291
*yuv2packedX = yuv2rgb15_X_c;
2293
case PIX_FMT_RGB444LE:
2294
case PIX_FMT_RGB444BE:
2295
case PIX_FMT_BGR444LE:
2296
case PIX_FMT_BGR444BE:
2297
*yuv2packed1 = yuv2rgb12_1_c;
2298
*yuv2packed2 = yuv2rgb12_2_c;
2299
*yuv2packedX = yuv2rgb12_X_c;
2303
*yuv2packed1 = yuv2rgb8_1_c;
2304
*yuv2packed2 = yuv2rgb8_2_c;
2305
*yuv2packedX = yuv2rgb8_X_c;
2309
*yuv2packed1 = yuv2rgb4_1_c;
2310
*yuv2packed2 = yuv2rgb4_2_c;
2311
*yuv2packedX = yuv2rgb4_X_c;
2313
case PIX_FMT_RGB4_BYTE:
2314
case PIX_FMT_BGR4_BYTE:
2315
*yuv2packed1 = yuv2rgb4b_1_c;
2316
*yuv2packed2 = yuv2rgb4b_2_c;
2317
*yuv2packedX = yuv2rgb4b_X_c;
2321
switch (dstFormat) {
2322
case PIX_FMT_GRAY16BE:
2323
*yuv2packed1 = yuv2gray16BE_1_c;
2324
*yuv2packed2 = yuv2gray16BE_2_c;
2325
*yuv2packedX = yuv2gray16BE_X_c;
2327
case PIX_FMT_GRAY16LE:
2328
*yuv2packed1 = yuv2gray16LE_1_c;
2329
*yuv2packed2 = yuv2gray16LE_2_c;
2330
*yuv2packedX = yuv2gray16LE_X_c;
2332
case PIX_FMT_MONOWHITE:
2333
*yuv2packed1 = yuv2monowhite_1_c;
2334
*yuv2packed2 = yuv2monowhite_2_c;
2335
*yuv2packedX = yuv2monowhite_X_c;
2337
case PIX_FMT_MONOBLACK:
2338
*yuv2packed1 = yuv2monoblack_1_c;
2339
*yuv2packed2 = yuv2monoblack_2_c;
2340
*yuv2packedX = yuv2monoblack_X_c;
2342
case PIX_FMT_YUYV422:
2343
*yuv2packed1 = yuv2yuyv422_1_c;
2344
*yuv2packed2 = yuv2yuyv422_2_c;
2345
*yuv2packedX = yuv2yuyv422_X_c;
2347
case PIX_FMT_UYVY422:
2348
*yuv2packed1 = yuv2uyvy422_1_c;
2349
*yuv2packed2 = yuv2uyvy422_2_c;
2350
*yuv2packedX = yuv2uyvy422_X_c;
2022
2355
#define DEBUG_SWSCALE_BUFFERS 0
2215
2568
const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2216
2569
const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2217
2570
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2572
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2573
const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2574
int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2575
for (i = 0; i < neg; i++)
2576
tmpY[i] = lumSrcPtr[neg];
2577
for ( ; i < end; i++)
2578
tmpY[i] = lumSrcPtr[i];
2579
for ( ; i < vLumFilterSize; i++)
2580
tmpY[i] = tmpY[i-1];
2584
const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2585
for (i = 0; i < neg; i++)
2586
tmpA[i] = alpSrcPtr[neg];
2587
for ( ; i < end; i++)
2588
tmpA[i] = alpSrcPtr[i];
2589
for ( ; i < vLumFilterSize; i++)
2590
tmpA[i] = tmpA[i - 1];
2594
if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2595
const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2596
**tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2597
int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2598
for (i = 0; i < neg; i++) {
2599
tmpU[i] = chrUSrcPtr[neg];
2600
tmpV[i] = chrVSrcPtr[neg];
2602
for ( ; i < end; i++) {
2603
tmpU[i] = chrUSrcPtr[i];
2604
tmpV[i] = chrVSrcPtr[i];
2606
for ( ; i < vChrFilterSize; i++) {
2607
tmpU[i] = tmpU[i - 1];
2608
tmpV[i] = tmpV[i - 1];
2218
2614
if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2219
2615
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2220
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2221
if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2222
const int16_t *lumBuf = lumSrcPtr[0];
2223
const int16_t *chrUBuf= chrUSrcPtr[0];
2224
const int16_t *chrVBuf= chrVSrcPtr[0];
2225
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2226
yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
2227
uDest, vDest, aDest, dstW, chrDstW);
2228
} else { //General YV12
2230
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2231
vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
2232
chrVSrcPtr, vChrFilterSize,
2233
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2617
if (vLumFilterSize == 1) {
2618
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2620
yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2621
lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2624
if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2626
yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2627
} else if (vChrFilterSize == 1) {
2628
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2629
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2631
yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2632
chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2633
yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2634
chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2638
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2639
if (vLumFilterSize == 1) {
2640
yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2642
yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2643
alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2236
2647
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2237
2648
assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2238
2649
if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2239
int chrAlpha= vChrFilter[2*dstY+1];
2240
yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
2241
*chrVSrcPtr, *(chrVSrcPtr+1),
2242
alpPixBuf ? *alpSrcPtr : NULL,
2243
dest, dstW, chrAlpha, dstFormat, flags, dstY);
2650
int chrAlpha = vChrFilter[2 * dstY + 1];
2651
yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2652
alpPixBuf ? *alpSrcPtr : NULL,
2653
dest[0], dstW, chrAlpha, dstY);
2244
2654
} else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2245
int lumAlpha= vLumFilter[2*dstY+1];
2246
int chrAlpha= vChrFilter[2*dstY+1];
2248
lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2250
chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
2251
yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
2252
*chrVSrcPtr, *(chrVSrcPtr+1),
2253
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2254
dest, dstW, lumAlpha, chrAlpha, dstY);
2655
int lumAlpha = vLumFilter[2 * dstY + 1];
2656
int chrAlpha = vChrFilter[2 * dstY + 1];
2658
lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2660
chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2661
yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2662
alpPixBuf ? alpSrcPtr : NULL,
2663
dest[0], dstW, lumAlpha, chrAlpha, dstY);
2255
2664
} else { //general RGB
2257
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2258
vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2259
alpSrcPtr, dest, dstW, dstY);
2665
yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2666
lumSrcPtr, vLumFilterSize,
2667
vChrFilter + dstY * vChrFilterSize,
2668
chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2669
alpSrcPtr, dest[0], dstW, dstY);