18
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
22
#include <inttypes.h>
28
#include "swscale_internal.h"
27
#include "libavutil/avutil.h"
28
#include "libavutil/bswap.h"
29
#include "libavutil/cpu.h"
30
30
#include "libavutil/intreadwrite.h"
31
#include "libavutil/cpu.h"
32
#include "libavutil/avutil.h"
33
31
#include "libavutil/mathematics.h"
34
#include "libavutil/bswap.h"
35
32
#include "libavutil/pixdesc.h"
39
#define RGB2YUV_SHIFT 15
40
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55
more intelligent misalignment avoidance for the horizontal scaler
56
write special vertical cubic upscale version
57
optimize C code (YV12 / minmax)
58
add support for packed pixel YUV input & output
59
add support for Y8 output
60
optimize BGR24 & BGR32
61
add BGR4 output support
62
write special BGR->BGR scaler
65
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66
{ 1, 3, 1, 3, 1, 3, 1, 3, },
67
{ 2, 0, 2, 0, 2, 0, 2, 0, },
70
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71
{ 6, 2, 6, 2, 6, 2, 6, 2, },
72
{ 0, 4, 0, 4, 0, 4, 0, 4, },
75
DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76
{ 8, 4, 11, 7, 8, 4, 11, 7, },
77
{ 2, 14, 1, 13, 2, 14, 1, 13, },
78
{ 10, 6, 9, 5, 10, 6, 9, 5, },
79
{ 0, 12, 3, 15, 0, 12, 3, 15, },
82
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83
{ 17, 9, 23, 15, 16, 8, 22, 14, },
84
{ 5, 29, 3, 27, 4, 28, 2, 26, },
85
{ 21, 13, 19, 11, 20, 12, 18, 10, },
86
{ 0, 24, 6, 30, 1, 25, 7, 31, },
87
{ 16, 8, 22, 14, 17, 9, 23, 15, },
88
{ 4, 28, 2, 26, 5, 29, 3, 27, },
89
{ 20, 12, 18, 10, 21, 13, 19, 11, },
90
{ 1, 25, 7, 31, 0, 24, 6, 30, },
93
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94
{ 0, 55, 14, 68, 3, 58, 17, 72, },
95
{ 37, 18, 50, 32, 40, 22, 54, 35, },
96
{ 9, 64, 5, 59, 13, 67, 8, 63, },
97
{ 46, 27, 41, 23, 49, 31, 44, 26, },
98
{ 2, 57, 16, 71, 1, 56, 15, 70, },
99
{ 39, 21, 52, 34, 38, 19, 51, 33, },
100
{ 11, 66, 7, 62, 10, 65, 6, 60, },
101
{ 48, 30, 43, 25, 47, 29, 42, 24, },
105
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106
{117, 62, 158, 103, 113, 58, 155, 100, },
107
{ 34, 199, 21, 186, 31, 196, 17, 182, },
108
{144, 89, 131, 76, 141, 86, 127, 72, },
109
{ 0, 165, 41, 206, 10, 175, 52, 217, },
110
{110, 55, 151, 96, 120, 65, 162, 107, },
111
{ 28, 193, 14, 179, 38, 203, 24, 189, },
112
{138, 83, 124, 69, 148, 93, 134, 79, },
113
{ 7, 172, 48, 213, 3, 168, 45, 210, },
116
// tries to correct a gamma of 1.5
117
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118
{ 0, 143, 18, 200, 2, 156, 25, 215, },
119
{ 78, 28, 125, 64, 89, 36, 138, 74, },
120
{ 10, 180, 3, 161, 16, 195, 8, 175, },
121
{109, 51, 93, 38, 121, 60, 105, 47, },
122
{ 1, 152, 23, 210, 0, 147, 20, 205, },
123
{ 85, 33, 134, 71, 81, 30, 130, 67, },
124
{ 14, 190, 6, 171, 12, 185, 5, 166, },
125
{117, 57, 101, 44, 113, 54, 97, 41, },
128
// tries to correct a gamma of 2.0
129
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130
{ 0, 124, 8, 193, 0, 140, 12, 213, },
131
{ 55, 14, 104, 42, 66, 19, 119, 52, },
132
{ 3, 168, 1, 145, 6, 187, 3, 162, },
133
{ 86, 31, 70, 21, 99, 39, 82, 28, },
134
{ 0, 134, 11, 206, 0, 129, 9, 200, },
135
{ 62, 17, 114, 48, 58, 16, 109, 45, },
136
{ 5, 181, 2, 157, 4, 175, 1, 151, },
137
{ 95, 36, 78, 26, 90, 34, 74, 24, },
140
// tries to correct a gamma of 2.5
141
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142
{ 0, 107, 3, 187, 0, 125, 6, 212, },
143
{ 39, 7, 86, 28, 49, 11, 102, 36, },
144
{ 1, 158, 0, 131, 3, 180, 1, 151, },
145
{ 68, 19, 52, 12, 81, 25, 64, 17, },
146
{ 0, 119, 5, 203, 0, 113, 4, 195, },
147
{ 45, 9, 96, 33, 42, 8, 91, 30, },
148
{ 2, 172, 1, 144, 2, 165, 0, 137, },
149
{ 77, 23, 60, 15, 72, 21, 56, 14, },
35
#include "swscale_internal.h"
152
38
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153
{ 36, 68, 60, 92, 34, 66, 58, 90,},
154
{ 100, 4,124, 28, 98, 2,122, 26,},
155
{ 52, 84, 44, 76, 50, 82, 42, 74,},
156
{ 116, 20,108, 12,114, 18,106, 10,},
157
{ 32, 64, 56, 88, 38, 70, 62, 94,},
158
{ 96, 0,120, 24,102, 6,126, 30,},
159
{ 48, 80, 40, 72, 54, 86, 46, 78,},
160
{ 112, 16,104, 8,118, 22,110, 14,},
162
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163
{ 64, 64, 64, 64, 64, 64, 64, 64 };
165
#define output_pixel(pos, val, bias, signedness) \
167
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172
static av_always_inline void
173
yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174
int big_endian, int output_bits)
177
int shift = 19 - output_bits;
179
for (i = 0; i < dstW; i++) {
180
int val = src[i] + (1 << (shift - 1));
181
output_pixel(&dest[i], val, 0, uint);
185
static av_always_inline void
186
yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187
const int32_t **src, uint16_t *dest, int dstW,
188
int big_endian, int output_bits)
191
int shift = 15 + 16 - output_bits;
193
for (i = 0; i < dstW; i++) {
194
int val = 1 << (30-output_bits);
197
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198
* filters (or anything with negative coeffs, the range can be slightly
199
* wider in both directions. To account for this overflow, we subtract
200
* a constant so it always fits in the signed range (assuming a
201
* reasonable filterSize), and re-add that at the end. */
203
for (j = 0; j < filterSize; j++)
204
val += src[j][i] * filter[j];
206
output_pixel(&dest[i], val, 0x8000, int);
212
#define output_pixel(pos, val) \
214
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219
static av_always_inline void
220
yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221
int big_endian, int output_bits)
224
int shift = 15 - output_bits;
226
for (i = 0; i < dstW; i++) {
227
int val = src[i] + (1 << (shift - 1));
228
output_pixel(&dest[i], val);
232
static av_always_inline void
233
yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234
const int16_t **src, uint16_t *dest, int dstW,
235
int big_endian, int output_bits)
238
int shift = 11 + 16 - output_bits;
240
for (i = 0; i < dstW; i++) {
241
int val = 1 << (26-output_bits);
244
for (j = 0; j < filterSize; j++)
245
val += src[j][i] * filter[j];
247
output_pixel(&dest[i], val);
253
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254
static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255
uint8_t *dest, int dstW, \
256
const uint8_t *dither, int offset)\
258
yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259
(uint16_t *) dest, dstW, is_be, bits); \
261
static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262
const int16_t **src, uint8_t *dest, int dstW, \
263
const uint8_t *dither, int offset)\
265
yuv2planeX_## template_size ## _c_template(filter, \
266
filterSize, (const typeX_t **) src, \
267
(uint16_t *) dest, dstW, is_be, bits); \
269
yuv2NBPS( 9, BE, 1, 10, int16_t)
270
yuv2NBPS( 9, LE, 0, 10, int16_t)
271
yuv2NBPS(10, BE, 1, 10, int16_t)
272
yuv2NBPS(10, LE, 0, 10, int16_t)
273
yuv2NBPS(16, BE, 1, 16, int32_t)
274
yuv2NBPS(16, LE, 0, 16, int32_t)
276
static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277
const int16_t **src, uint8_t *dest, int dstW,
278
const uint8_t *dither, int offset)
281
for (i=0; i<dstW; i++) {
282
int val = dither[(i + offset) & 7] << 12;
284
for (j=0; j<filterSize; j++)
285
val += src[j][i] * filter[j];
287
dest[i]= av_clip_uint8(val>>19);
291
static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292
const uint8_t *dither, int offset)
295
for (i=0; i<dstW; i++) {
296
int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297
dest[i]= av_clip_uint8(val);
301
static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302
const int16_t **chrUSrc, const int16_t **chrVSrc,
303
uint8_t *dest, int chrDstW)
305
enum PixelFormat dstFormat = c->dstFormat;
306
const uint8_t *chrDither = c->chrDither8;
309
if (dstFormat == PIX_FMT_NV12)
310
for (i=0; i<chrDstW; i++) {
311
int u = chrDither[i & 7] << 12;
312
int v = chrDither[(i + 3) & 7] << 12;
314
for (j=0; j<chrFilterSize; j++) {
315
u += chrUSrc[j][i] * chrFilter[j];
316
v += chrVSrc[j][i] * chrFilter[j];
319
dest[2*i]= av_clip_uint8(u>>19);
320
dest[2*i+1]= av_clip_uint8(v>>19);
323
for (i=0; i<chrDstW; i++) {
324
int u = chrDither[i & 7] << 12;
325
int v = chrDither[(i + 3) & 7] << 12;
327
for (j=0; j<chrFilterSize; j++) {
328
u += chrUSrc[j][i] * chrFilter[j];
329
v += chrVSrc[j][i] * chrFilter[j];
332
dest[2*i]= av_clip_uint8(v>>19);
333
dest[2*i+1]= av_clip_uint8(u>>19);
337
#define output_pixel(pos, val) \
338
if (target == PIX_FMT_GRAY16BE) { \
344
static av_always_inline void
345
yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346
const int32_t **lumSrc, int lumFilterSize,
347
const int16_t *chrFilter, const int32_t **chrUSrc,
348
const int32_t **chrVSrc, int chrFilterSize,
349
const int32_t **alpSrc, uint16_t *dest, int dstW,
350
int y, enum PixelFormat target)
354
for (i = 0; i < (dstW >> 1); i++) {
356
int Y1 = (1 << 14) - 0x40000000;
357
int Y2 = (1 << 14) - 0x40000000;
359
for (j = 0; j < lumFilterSize; j++) {
360
Y1 += lumSrc[j][i * 2] * lumFilter[j];
361
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365
Y1 = av_clip_int16(Y1);
366
Y2 = av_clip_int16(Y2);
367
output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368
output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
372
static av_always_inline void
373
yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374
const int32_t *ubuf[2], const int32_t *vbuf[2],
375
const int32_t *abuf[2], uint16_t *dest, int dstW,
376
int yalpha, int uvalpha, int y,
377
enum PixelFormat target)
379
int yalpha1 = 4095 - yalpha;
381
const int32_t *buf0 = buf[0], *buf1 = buf[1];
383
for (i = 0; i < (dstW >> 1); i++) {
384
int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387
output_pixel(&dest[i * 2 + 0], Y1);
388
output_pixel(&dest[i * 2 + 1], Y2);
392
static av_always_inline void
393
yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394
const int32_t *ubuf[2], const int32_t *vbuf[2],
395
const int32_t *abuf0, uint16_t *dest, int dstW,
396
int uvalpha, int y, enum PixelFormat target)
400
for (i = 0; i < (dstW >> 1); i++) {
401
int Y1 = buf0[i * 2 ] << 1;
402
int Y2 = buf0[i * 2 + 1] << 1;
404
output_pixel(&dest[i * 2 + 0], Y1);
405
output_pixel(&dest[i * 2 + 1], Y2);
411
#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413
const int16_t **_lumSrc, int lumFilterSize, \
414
const int16_t *chrFilter, const int16_t **_chrUSrc, \
415
const int16_t **_chrVSrc, int chrFilterSize, \
416
const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419
const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420
**chrUSrc = (const int32_t **) _chrUSrc, \
421
**chrVSrc = (const int32_t **) _chrVSrc, \
422
**alpSrc = (const int32_t **) _alpSrc; \
423
uint16_t *dest = (uint16_t *) _dest; \
424
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426
alpSrc, dest, dstW, y, fmt); \
429
static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431
const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432
int yalpha, int uvalpha, int y) \
434
const int32_t **buf = (const int32_t **) _buf, \
435
**ubuf = (const int32_t **) _ubuf, \
436
**vbuf = (const int32_t **) _vbuf, \
437
**abuf = (const int32_t **) _abuf; \
438
uint16_t *dest = (uint16_t *) _dest; \
439
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440
dest, dstW, yalpha, uvalpha, y, fmt); \
443
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445
const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446
int uvalpha, int y) \
448
const int32_t *buf0 = (const int32_t *) _buf0, \
449
**ubuf = (const int32_t **) _ubuf, \
450
**vbuf = (const int32_t **) _vbuf, \
451
*abuf0 = (const int32_t *) _abuf0; \
452
uint16_t *dest = (uint16_t *) _dest; \
453
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454
dstW, uvalpha, y, fmt); \
457
YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458
YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
460
#define output_pixel(pos, acc) \
461
if (target == PIX_FMT_MONOBLACK) { \
467
static av_always_inline void
468
yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469
const int16_t **lumSrc, int lumFilterSize,
470
const int16_t *chrFilter, const int16_t **chrUSrc,
471
const int16_t **chrVSrc, int chrFilterSize,
472
const int16_t **alpSrc, uint8_t *dest, int dstW,
473
int y, enum PixelFormat target)
475
const uint8_t * const d128=dither_8x8_220[y&7];
476
uint8_t *g = c->table_gU[128] + c->table_gV[128];
480
for (i = 0; i < dstW - 1; i += 2) {
485
for (j = 0; j < lumFilterSize; j++) {
486
Y1 += lumSrc[j][i] * lumFilter[j];
487
Y2 += lumSrc[j][i+1] * lumFilter[j];
491
if ((Y1 | Y2) & 0x100) {
492
Y1 = av_clip_uint8(Y1);
493
Y2 = av_clip_uint8(Y2);
495
acc += acc + g[Y1 + d128[(i + 0) & 7]];
496
acc += acc + g[Y2 + d128[(i + 1) & 7]];
498
output_pixel(*dest++, acc);
503
static av_always_inline void
504
yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505
const int16_t *ubuf[2], const int16_t *vbuf[2],
506
const int16_t *abuf[2], uint8_t *dest, int dstW,
507
int yalpha, int uvalpha, int y,
508
enum PixelFormat target)
510
const int16_t *buf0 = buf[0], *buf1 = buf[1];
511
const uint8_t * const d128 = dither_8x8_220[y & 7];
512
uint8_t *g = c->table_gU[128] + c->table_gV[128];
513
int yalpha1 = 4095 - yalpha;
516
for (i = 0; i < dstW - 7; i += 8) {
517
int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518
acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519
acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520
acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521
acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522
acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523
acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524
acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525
output_pixel(*dest++, acc);
529
static av_always_inline void
530
yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531
const int16_t *ubuf[2], const int16_t *vbuf[2],
532
const int16_t *abuf0, uint8_t *dest, int dstW,
533
int uvalpha, int y, enum PixelFormat target)
535
const uint8_t * const d128 = dither_8x8_220[y & 7];
536
uint8_t *g = c->table_gU[128] + c->table_gV[128];
539
for (i = 0; i < dstW - 7; i += 8) {
540
int acc = g[(buf0[i ] >> 7) + d128[0]];
541
acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542
acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543
acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544
acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545
acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546
acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547
acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548
output_pixel(*dest++, acc);
554
#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556
const int16_t **lumSrc, int lumFilterSize, \
557
const int16_t *chrFilter, const int16_t **chrUSrc, \
558
const int16_t **chrVSrc, int chrFilterSize, \
559
const int16_t **alpSrc, uint8_t *dest, int dstW, \
562
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564
alpSrc, dest, dstW, y, fmt); \
567
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568
const int16_t *ubuf[2], const int16_t *vbuf[2], \
569
const int16_t *abuf[2], uint8_t *dest, int dstW, \
570
int yalpha, int uvalpha, int y) \
572
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573
dest, dstW, yalpha, uvalpha, y, fmt); \
576
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577
const int16_t *ubuf[2], const int16_t *vbuf[2], \
578
const int16_t *abuf0, uint8_t *dest, int dstW, \
579
int uvalpha, int y) \
581
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582
abuf0, dest, dstW, uvalpha, \
586
YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587
YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
589
#define output_pixels(pos, Y1, U, Y2, V) \
590
if (target == PIX_FMT_YUYV422) { \
591
dest[pos + 0] = Y1; \
593
dest[pos + 2] = Y2; \
597
dest[pos + 1] = Y1; \
599
dest[pos + 3] = Y2; \
602
static av_always_inline void
603
yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604
const int16_t **lumSrc, int lumFilterSize,
605
const int16_t *chrFilter, const int16_t **chrUSrc,
606
const int16_t **chrVSrc, int chrFilterSize,
607
const int16_t **alpSrc, uint8_t *dest, int dstW,
608
int y, enum PixelFormat target)
612
for (i = 0; i < (dstW >> 1); i++) {
619
for (j = 0; j < lumFilterSize; j++) {
620
Y1 += lumSrc[j][i * 2] * lumFilter[j];
621
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623
for (j = 0; j < chrFilterSize; j++) {
624
U += chrUSrc[j][i] * chrFilter[j];
625
V += chrVSrc[j][i] * chrFilter[j];
631
if ((Y1 | Y2 | U | V) & 0x100) {
632
Y1 = av_clip_uint8(Y1);
633
Y2 = av_clip_uint8(Y2);
634
U = av_clip_uint8(U);
635
V = av_clip_uint8(V);
637
output_pixels(4*i, Y1, U, Y2, V);
641
static av_always_inline void
642
yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643
const int16_t *ubuf[2], const int16_t *vbuf[2],
644
const int16_t *abuf[2], uint8_t *dest, int dstW,
645
int yalpha, int uvalpha, int y,
646
enum PixelFormat target)
648
const int16_t *buf0 = buf[0], *buf1 = buf[1],
649
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651
int yalpha1 = 4095 - yalpha;
652
int uvalpha1 = 4095 - uvalpha;
655
for (i = 0; i < (dstW >> 1); i++) {
656
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661
output_pixels(i * 4, Y1, U, Y2, V);
665
static av_always_inline void
666
yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667
const int16_t *ubuf[2], const int16_t *vbuf[2],
668
const int16_t *abuf0, uint8_t *dest, int dstW,
669
int uvalpha, int y, enum PixelFormat target)
671
const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675
if (uvalpha < 2048) {
676
for (i = 0; i < (dstW >> 1); i++) {
677
int Y1 = buf0[i * 2] >> 7;
678
int Y2 = buf0[i * 2 + 1] >> 7;
679
int U = ubuf1[i] >> 7;
680
int V = vbuf1[i] >> 7;
682
output_pixels(i * 4, Y1, U, Y2, V);
685
for (i = 0; i < (dstW >> 1); i++) {
686
int Y1 = buf0[i * 2] >> 7;
687
int Y2 = buf0[i * 2 + 1] >> 7;
688
int U = (ubuf0[i] + ubuf1[i]) >> 8;
689
int V = (vbuf0[i] + vbuf1[i]) >> 8;
691
output_pixels(i * 4, Y1, U, Y2, V);
698
YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699
YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
701
#define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702
#define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703
#define output_pixel(pos, val) \
704
if (isBE(target)) { \
710
static av_always_inline void
711
yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712
const int32_t **lumSrc, int lumFilterSize,
713
const int16_t *chrFilter, const int32_t **chrUSrc,
714
const int32_t **chrVSrc, int chrFilterSize,
715
const int32_t **alpSrc, uint16_t *dest, int dstW,
716
int y, enum PixelFormat target)
720
for (i = 0; i < (dstW >> 1); i++) {
722
int Y1 = -0x40000000;
723
int Y2 = -0x40000000;
724
int U = -128 << 23; // 19
728
for (j = 0; j < lumFilterSize; j++) {
729
Y1 += lumSrc[j][i * 2] * lumFilter[j];
730
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732
for (j = 0; j < chrFilterSize; j++) {
733
U += chrUSrc[j][i] * chrFilter[j];
734
V += chrVSrc[j][i] * chrFilter[j];
737
// 8bit: 12+15=27; 16-bit: 12+19=31
745
// 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746
Y1 -= c->yuv2rgb_y_offset;
747
Y2 -= c->yuv2rgb_y_offset;
748
Y1 *= c->yuv2rgb_y_coeff;
749
Y2 *= c->yuv2rgb_y_coeff;
752
// 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754
R = V * c->yuv2rgb_v2r_coeff;
755
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756
B = U * c->yuv2rgb_u2b_coeff;
758
// 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
769
static av_always_inline void
770
yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771
const int32_t *ubuf[2], const int32_t *vbuf[2],
772
const int32_t *abuf[2], uint16_t *dest, int dstW,
773
int yalpha, int uvalpha, int y,
774
enum PixelFormat target)
776
const int32_t *buf0 = buf[0], *buf1 = buf[1],
777
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779
int yalpha1 = 4095 - yalpha;
780
int uvalpha1 = 4095 - uvalpha;
783
for (i = 0; i < (dstW >> 1); i++) {
784
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790
Y1 -= c->yuv2rgb_y_offset;
791
Y2 -= c->yuv2rgb_y_offset;
792
Y1 *= c->yuv2rgb_y_coeff;
793
Y2 *= c->yuv2rgb_y_coeff;
797
R = V * c->yuv2rgb_v2r_coeff;
798
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799
B = U * c->yuv2rgb_u2b_coeff;
801
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811
static av_always_inline void
812
yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813
const int32_t *ubuf[2], const int32_t *vbuf[2],
814
const int32_t *abuf0, uint16_t *dest, int dstW,
815
int uvalpha, int y, enum PixelFormat target)
817
const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821
if (uvalpha < 2048) {
822
for (i = 0; i < (dstW >> 1); i++) {
823
int Y1 = (buf0[i * 2] ) >> 2;
824
int Y2 = (buf0[i * 2 + 1]) >> 2;
825
int U = (ubuf0[i] + (-128 << 11)) >> 2;
826
int V = (vbuf0[i] + (-128 << 11)) >> 2;
829
Y1 -= c->yuv2rgb_y_offset;
830
Y2 -= c->yuv2rgb_y_offset;
831
Y1 *= c->yuv2rgb_y_coeff;
832
Y2 *= c->yuv2rgb_y_coeff;
836
R = V * c->yuv2rgb_v2r_coeff;
837
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838
B = U * c->yuv2rgb_u2b_coeff;
840
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849
for (i = 0; i < (dstW >> 1); i++) {
850
int Y1 = (buf0[i * 2] ) >> 2;
851
int Y2 = (buf0[i * 2 + 1]) >> 2;
852
int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853
int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856
Y1 -= c->yuv2rgb_y_offset;
857
Y2 -= c->yuv2rgb_y_offset;
858
Y1 *= c->yuv2rgb_y_coeff;
859
Y2 *= c->yuv2rgb_y_coeff;
863
R = V * c->yuv2rgb_v2r_coeff;
864
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865
B = U * c->yuv2rgb_u2b_coeff;
867
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884
YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885
YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
888
* Write out 2 RGB pixels in the target pixel format. This function takes a
889
* R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890
* things like endianness conversion and shifting. The caller takes care of
891
* setting the correct offset in these tables from the chroma (U/V) values.
892
* This function then uses the luminance (Y1/Y2) values to write out the
893
* correct RGB values into the destination buffer.
895
static av_always_inline void
896
yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897
unsigned A1, unsigned A2,
898
const void *_r, const void *_g, const void *_b, int y,
899
enum PixelFormat target, int hasAlpha)
901
if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902
target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903
uint32_t *dest = (uint32_t *) _dest;
904
const uint32_t *r = (const uint32_t *) _r;
905
const uint32_t *g = (const uint32_t *) _g;
906
const uint32_t *b = (const uint32_t *) _b;
909
int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
911
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
915
int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
917
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
920
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
924
} else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925
uint8_t *dest = (uint8_t *) _dest;
926
const uint8_t *r = (const uint8_t *) _r;
927
const uint8_t *g = (const uint8_t *) _g;
928
const uint8_t *b = (const uint8_t *) _b;
930
#define r_b ((target == PIX_FMT_RGB24) ? r : b)
931
#define b_r ((target == PIX_FMT_RGB24) ? b : r)
932
dest[i * 6 + 0] = r_b[Y1];
933
dest[i * 6 + 1] = g[Y1];
934
dest[i * 6 + 2] = b_r[Y1];
935
dest[i * 6 + 3] = r_b[Y2];
936
dest[i * 6 + 4] = g[Y2];
937
dest[i * 6 + 5] = b_r[Y2];
940
} else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941
target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942
target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943
uint16_t *dest = (uint16_t *) _dest;
944
const uint16_t *r = (const uint16_t *) _r;
945
const uint16_t *g = (const uint16_t *) _g;
946
const uint16_t *b = (const uint16_t *) _b;
947
int dr1, dg1, db1, dr2, dg2, db2;
949
if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950
dr1 = dither_2x2_8[ y & 1 ][0];
951
dg1 = dither_2x2_4[ y & 1 ][0];
952
db1 = dither_2x2_8[(y & 1) ^ 1][0];
953
dr2 = dither_2x2_8[ y & 1 ][1];
954
dg2 = dither_2x2_4[ y & 1 ][1];
955
db2 = dither_2x2_8[(y & 1) ^ 1][1];
956
} else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957
dr1 = dither_2x2_8[ y & 1 ][0];
958
dg1 = dither_2x2_8[ y & 1 ][1];
959
db1 = dither_2x2_8[(y & 1) ^ 1][0];
960
dr2 = dither_2x2_8[ y & 1 ][1];
961
dg2 = dither_2x2_8[ y & 1 ][0];
962
db2 = dither_2x2_8[(y & 1) ^ 1][1];
964
dr1 = dither_4x4_16[ y & 3 ][0];
965
dg1 = dither_4x4_16[ y & 3 ][1];
966
db1 = dither_4x4_16[(y & 3) ^ 3][0];
967
dr2 = dither_4x4_16[ y & 3 ][1];
968
dg2 = dither_4x4_16[ y & 3 ][0];
969
db2 = dither_4x4_16[(y & 3) ^ 3][1];
972
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974
} else /* 8/4-bit */ {
975
uint8_t *dest = (uint8_t *) _dest;
976
const uint8_t *r = (const uint8_t *) _r;
977
const uint8_t *g = (const uint8_t *) _g;
978
const uint8_t *b = (const uint8_t *) _b;
979
int dr1, dg1, db1, dr2, dg2, db2;
981
if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982
const uint8_t * const d64 = dither_8x8_73[y & 7];
983
const uint8_t * const d32 = dither_8x8_32[y & 7];
984
dr1 = dg1 = d32[(i * 2 + 0) & 7];
985
db1 = d64[(i * 2 + 0) & 7];
986
dr2 = dg2 = d32[(i * 2 + 1) & 7];
987
db2 = d64[(i * 2 + 1) & 7];
989
const uint8_t * const d64 = dither_8x8_73 [y & 7];
990
const uint8_t * const d128 = dither_8x8_220[y & 7];
991
dr1 = db1 = d128[(i * 2 + 0) & 7];
992
dg1 = d64[(i * 2 + 0) & 7];
993
dr2 = db2 = d128[(i * 2 + 1) & 7];
994
dg2 = d64[(i * 2 + 1) & 7];
997
if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998
dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999
((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1001
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1007
static av_always_inline void
1008
yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009
const int16_t **lumSrc, int lumFilterSize,
1010
const int16_t *chrFilter, const int16_t **chrUSrc,
1011
const int16_t **chrVSrc, int chrFilterSize,
1012
const int16_t **alpSrc, uint8_t *dest, int dstW,
1013
int y, enum PixelFormat target, int hasAlpha)
1017
for (i = 0; i < (dstW >> 1); i++) {
1023
int av_unused A1, A2;
1024
const void *r, *g, *b;
1026
for (j = 0; j < lumFilterSize; j++) {
1027
Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1030
for (j = 0; j < chrFilterSize; j++) {
1031
U += chrUSrc[j][i] * chrFilter[j];
1032
V += chrVSrc[j][i] * chrFilter[j];
1038
if ((Y1 | Y2 | U | V) & 0x100) {
1039
Y1 = av_clip_uint8(Y1);
1040
Y2 = av_clip_uint8(Y2);
1041
U = av_clip_uint8(U);
1042
V = av_clip_uint8(V);
1047
for (j = 0; j < lumFilterSize; j++) {
1048
A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049
A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1053
if ((A1 | A2) & 0x100) {
1054
A1 = av_clip_uint8(A1);
1055
A2 = av_clip_uint8(A2);
1059
/* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1061
g = (c->table_gU[U] + c->table_gV[V]);
1064
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065
r, g, b, y, target, hasAlpha);
1069
static av_always_inline void
1070
yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071
const int16_t *ubuf[2], const int16_t *vbuf[2],
1072
const int16_t *abuf[2], uint8_t *dest, int dstW,
1073
int yalpha, int uvalpha, int y,
1074
enum PixelFormat target, int hasAlpha)
1076
const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079
*abuf0 = hasAlpha ? abuf[0] : NULL,
1080
*abuf1 = hasAlpha ? abuf[1] : NULL;
1081
int yalpha1 = 4095 - yalpha;
1082
int uvalpha1 = 4095 - uvalpha;
1085
for (i = 0; i < (dstW >> 1); i++) {
1086
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1087
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1088
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1089
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1091
const void *r = c->table_rV[V],
1092
*g = (c->table_gU[U] + c->table_gV[V]),
1093
*b = c->table_bU[U];
1096
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1097
A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1100
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101
r, g, b, y, target, hasAlpha);
1105
static av_always_inline void
1106
yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107
const int16_t *ubuf[2], const int16_t *vbuf[2],
1108
const int16_t *abuf0, uint8_t *dest, int dstW,
1109
int uvalpha, int y, enum PixelFormat target,
1112
const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1116
if (uvalpha < 2048) {
1117
for (i = 0; i < (dstW >> 1); i++) {
1118
int Y1 = buf0[i * 2] >> 7;
1119
int Y2 = buf0[i * 2 + 1] >> 7;
1120
int U = ubuf1[i] >> 7;
1121
int V = vbuf1[i] >> 7;
1123
const void *r = c->table_rV[V],
1124
*g = (c->table_gU[U] + c->table_gV[V]),
1125
*b = c->table_bU[U];
1128
A1 = abuf0[i * 2 ] >> 7;
1129
A2 = abuf0[i * 2 + 1] >> 7;
1132
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133
r, g, b, y, target, hasAlpha);
1136
for (i = 0; i < (dstW >> 1); i++) {
1137
int Y1 = buf0[i * 2] >> 7;
1138
int Y2 = buf0[i * 2 + 1] >> 7;
1139
int U = (ubuf0[i] + ubuf1[i]) >> 8;
1140
int V = (vbuf0[i] + vbuf1[i]) >> 8;
1142
const void *r = c->table_rV[V],
1143
*g = (c->table_gU[U] + c->table_gV[V]),
1144
*b = c->table_bU[U];
1147
A1 = abuf0[i * 2 ] >> 7;
1148
A2 = abuf0[i * 2 + 1] >> 7;
1151
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152
r, g, b, y, target, hasAlpha);
1157
#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159
const int16_t **lumSrc, int lumFilterSize, \
1160
const int16_t *chrFilter, const int16_t **chrUSrc, \
1161
const int16_t **chrVSrc, int chrFilterSize, \
1162
const int16_t **alpSrc, uint8_t *dest, int dstW, \
1165
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167
alpSrc, dest, dstW, y, fmt, hasAlpha); \
1169
#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170
YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172
const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173
const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174
int yalpha, int uvalpha, int y) \
1176
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177
dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1180
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181
const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182
const int16_t *abuf0, uint8_t *dest, int dstW, \
1183
int uvalpha, int y) \
1185
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186
dstW, uvalpha, y, fmt, hasAlpha); \
1190
YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191
YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1193
#if CONFIG_SWSCALE_ALPHA
1194
YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195
YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1197
YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198
YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1200
YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201
YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202
YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203
YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204
YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205
YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206
YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207
YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1209
static av_always_inline void
1210
yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211
const int16_t **lumSrc, int lumFilterSize,
1212
const int16_t *chrFilter, const int16_t **chrUSrc,
1213
const int16_t **chrVSrc, int chrFilterSize,
1214
const int16_t **alpSrc, uint8_t *dest,
1215
int dstW, int y, enum PixelFormat target, int hasAlpha)
1218
int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1220
for (i = 0; i < dstW; i++) {
1228
for (j = 0; j < lumFilterSize; j++) {
1229
Y += lumSrc[j][i] * lumFilter[j];
1231
for (j = 0; j < chrFilterSize; j++) {
1232
U += chrUSrc[j][i] * chrFilter[j];
1233
V += chrVSrc[j][i] * chrFilter[j];
1240
for (j = 0; j < lumFilterSize; j++) {
1241
A += alpSrc[j][i] * lumFilter[j];
1245
A = av_clip_uint8(A);
1247
Y -= c->yuv2rgb_y_offset;
1248
Y *= c->yuv2rgb_y_coeff;
1250
R = Y + V*c->yuv2rgb_v2r_coeff;
1251
G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252
B = Y + U*c->yuv2rgb_u2b_coeff;
1253
if ((R | G | B) & 0xC0000000) {
1254
R = av_clip_uintp2(R, 30);
1255
G = av_clip_uintp2(G, 30);
1256
B = av_clip_uintp2(B, 30);
1261
dest[0] = hasAlpha ? A : 255;
1275
dest[3] = hasAlpha ? A : 255;
1278
dest[0] = hasAlpha ? A : 255;
1293
dest[3] = hasAlpha ? A : 255;
1301
YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302
YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303
YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304
YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1306
#if CONFIG_SWSCALE_ALPHA
1307
YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308
YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309
YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310
YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1312
YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313
YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314
YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315
YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1317
YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318
YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1320
static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321
int width, int height,
1325
uint8_t *ptr = plane + stride*y;
1326
for (i=0; i<height; i++) {
39
{ 36, 68, 60, 92, 34, 66, 58, 90, },
40
{ 100, 4, 124, 28, 98, 2, 122, 26, },
41
{ 52, 84, 44, 76, 50, 82, 42, 74, },
42
{ 116, 20, 108, 12, 114, 18, 106, 10, },
43
{ 32, 64, 56, 88, 38, 70, 62, 94, },
44
{ 96, 0, 120, 24, 102, 6, 126, 30, },
45
{ 48, 80, 40, 72, 54, 86, 46, 78, },
46
{ 112, 16, 104, 8, 118, 22, 110, 14, },
49
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
50
64, 64, 64, 64, 64, 64, 64, 64
53
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
54
int height, int y, uint8_t val)
57
uint8_t *ptr = plane + stride * y;
58
for (i = 0; i < height; i++) {
1327
59
memset(ptr, val, width);
1332
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1334
#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335
#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1337
static av_always_inline void
1338
rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339
enum PixelFormat origin)
1342
for (i = 0; i < width; i++) {
1343
unsigned int r_b = input_pixel(&src[i*3+0]);
1344
unsigned int g = input_pixel(&src[i*3+1]);
1345
unsigned int b_r = input_pixel(&src[i*3+2]);
1347
dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351
static av_always_inline void
1352
rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353
const uint16_t *src1, const uint16_t *src2,
1354
int width, enum PixelFormat origin)
1358
for (i = 0; i < width; i++) {
1359
int r_b = input_pixel(&src1[i*3+0]);
1360
int g = input_pixel(&src1[i*3+1]);
1361
int b_r = input_pixel(&src1[i*3+2]);
1363
dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364
dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1368
static av_always_inline void
1369
rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370
const uint16_t *src1, const uint16_t *src2,
1371
int width, enum PixelFormat origin)
1375
for (i = 0; i < width; i++) {
1376
int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377
int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378
int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1380
dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381
dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389
#define rgb48funcs(pattern, BE_LE, origin) \
1390
static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391
int width, uint32_t *unused) \
1393
const uint16_t *src = (const uint16_t *) _src; \
1394
uint16_t *dst = (uint16_t *) _dst; \
1395
rgb48ToY_c_template(dst, src, width, origin); \
1398
static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399
const uint8_t *_src1, const uint8_t *_src2, \
1400
int width, uint32_t *unused) \
1402
const uint16_t *src1 = (const uint16_t *) _src1, \
1403
*src2 = (const uint16_t *) _src2; \
1404
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405
rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1408
static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409
const uint8_t *_src1, const uint8_t *_src2, \
1410
int width, uint32_t *unused) \
1412
const uint16_t *src1 = (const uint16_t *) _src1, \
1413
*src2 = (const uint16_t *) _src2; \
1414
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415
rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1418
rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1419
rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420
rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421
rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1423
#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424
origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425
(isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1427
static av_always_inline void
1428
rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429
int width, enum PixelFormat origin,
1430
int shr, int shg, int shb, int shp,
1431
int maskr, int maskg, int maskb,
1432
int rsh, int gsh, int bsh, int S)
1434
const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435
const unsigned rnd = 33u << (S - 1);
1438
for (i = 0; i < width; i++) {
1439
int px = input_pixel(i) >> shp;
1440
int b = (px & maskb) >> shb;
1441
int g = (px & maskg) >> shg;
1442
int r = (px & maskr) >> shr;
1444
dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1448
static av_always_inline void
1449
rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450
const uint8_t *src, int width,
1451
enum PixelFormat origin,
1452
int shr, int shg, int shb, int shp,
1453
int maskr, int maskg, int maskb,
1454
int rsh, int gsh, int bsh, int S)
1456
const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457
rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458
const unsigned rnd = 257u << (S - 1);
1461
for (i = 0; i < width; i++) {
1462
int px = input_pixel(i) >> shp;
1463
int b = (px & maskb) >> shb;
1464
int g = (px & maskg) >> shg;
1465
int r = (px & maskr) >> shr;
1467
dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468
dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1472
static av_always_inline void
1473
rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474
const uint8_t *src, int width,
1475
enum PixelFormat origin,
1476
int shr, int shg, int shb, int shp,
1477
int maskr, int maskg, int maskb,
1478
int rsh, int gsh, int bsh, int S)
1480
const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481
rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482
maskgx = ~(maskr | maskb);
1483
const unsigned rnd = 257u << S;
1486
maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487
for (i = 0; i < width; i++) {
1488
int px0 = input_pixel(2 * i + 0) >> shp;
1489
int px1 = input_pixel(2 * i + 1) >> shp;
1490
int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491
int rb = px0 + px1 - g;
1493
b = (rb & maskb) >> shb;
1494
if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495
origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1498
g = (g & maskg) >> shg;
1500
r = (rb & maskr) >> shr;
1502
dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503
dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1509
#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510
maskg, maskb, rsh, gsh, bsh, S) \
1511
static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512
int width, uint32_t *unused) \
1514
rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515
maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518
static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519
const uint8_t *src, const uint8_t *dummy, \
1520
int width, uint32_t *unused) \
1522
rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523
maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526
static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527
const uint8_t *src, const uint8_t *dummy, \
1528
int width, uint32_t *unused) \
1530
rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531
maskr, maskg, maskb, rsh, gsh, bsh, S); \
1534
rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535
rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536
rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537
rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538
rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539
rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540
rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1541
rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1542
rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1543
rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1544
rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1545
rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1546
rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1547
rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1548
rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1549
rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1551
static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1554
for (i=0; i<width; i++) {
1559
static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1562
for (i=0; i<width; i++) {
1567
static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1570
for (i=0; i<width; i++) {
1573
dst[i]= pal[d] & 0xFF;
1577
static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1578
const uint8_t *src1, const uint8_t *src2,
1579
int width, uint32_t *pal)
1582
assert(src1 == src2);
1583
for (i=0; i<width; i++) {
1584
int p= pal[src1[i]];
1591
static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1592
int width, uint32_t *unused)
1595
for (i=0; i<width/8; i++) {
1598
dst[8*i+j]= ((d>>(7-j))&1)*255;
1602
static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1603
int width, uint32_t *unused)
1606
for (i=0; i<width/8; i++) {
1609
dst[8*i+j]= ((d>>(7-j))&1)*255;
1613
//FIXME yuy2* can read up to 7 samples too much
1615
static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1619
for (i=0; i<width; i++)
1623
static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1624
const uint8_t *src2, int width, uint32_t *unused)
1627
for (i=0; i<width; i++) {
1628
dstU[i]= src1[4*i + 1];
1629
dstV[i]= src1[4*i + 3];
1631
assert(src1 == src2);
1634
static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1637
const uint16_t *src = (const uint16_t *) _src;
1638
uint16_t *dst = (uint16_t *) _dst;
1639
for (i=0; i<width; i++) {
1640
dst[i] = av_bswap16(src[i]);
1644
static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1645
const uint8_t *_src2, int width, uint32_t *unused)
1648
const uint16_t *src1 = (const uint16_t *) _src1,
1649
*src2 = (const uint16_t *) _src2;
1650
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1651
for (i=0; i<width; i++) {
1652
dstU[i] = av_bswap16(src1[i]);
1653
dstV[i] = av_bswap16(src2[i]);
1657
/* This is almost identical to the previous, end exists only because
1658
* yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1659
static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1663
for (i=0; i<width; i++)
1667
static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1668
const uint8_t *src2, int width, uint32_t *unused)
1671
for (i=0; i<width; i++) {
1672
dstU[i]= src1[4*i + 0];
1673
dstV[i]= src1[4*i + 2];
1675
assert(src1 == src2);
1678
static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1679
const uint8_t *src, int width)
1682
for (i = 0; i < width; i++) {
1683
dst1[i] = src[2*i+0];
1684
dst2[i] = src[2*i+1];
1688
static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1689
const uint8_t *src1, const uint8_t *src2,
1690
int width, uint32_t *unused)
1692
nvXXtoUV_c(dstU, dstV, src1, width);
1695
static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1696
const uint8_t *src1, const uint8_t *src2,
1697
int width, uint32_t *unused)
1699
nvXXtoUV_c(dstV, dstU, src1, width);
1702
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1704
static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1705
int width, uint32_t *unused)
1708
for (i=0; i<width; i++) {
1713
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1717
static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1718
const uint8_t *src2, int width, uint32_t *unused)
1721
for (i=0; i<width; i++) {
1722
int b= src1[3*i + 0];
1723
int g= src1[3*i + 1];
1724
int r= src1[3*i + 2];
1726
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1727
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1729
assert(src1 == src2);
1732
static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733
const uint8_t *src2, int width, uint32_t *unused)
1736
for (i=0; i<width; i++) {
1737
int b= src1[6*i + 0] + src1[6*i + 3];
1738
int g= src1[6*i + 1] + src1[6*i + 4];
1739
int r= src1[6*i + 2] + src1[6*i + 5];
1741
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1742
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1744
assert(src1 == src2);
1747
static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1751
for (i=0; i<width; i++) {
1756
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1760
static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1761
const uint8_t *src2, int width, uint32_t *unused)
1765
for (i=0; i<width; i++) {
1766
int r= src1[3*i + 0];
1767
int g= src1[3*i + 1];
1768
int b= src1[3*i + 2];
1770
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1771
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1775
static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1776
const uint8_t *src2, int width, uint32_t *unused)
1780
for (i=0; i<width; i++) {
1781
int r= src1[6*i + 0] + src1[6*i + 3];
1782
int g= src1[6*i + 1] + src1[6*i + 4];
1783
int b= src1[6*i + 2] + src1[6*i + 5];
1785
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1786
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1790
static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1793
for (i = 0; i < width; i++) {
1798
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1802
static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1805
const uint16_t **src = (const uint16_t **) _src;
1806
uint16_t *dst = (uint16_t *) _dst;
1807
for (i = 0; i < width; i++) {
1808
int g = AV_RL16(src[0] + i);
1809
int b = AV_RL16(src[1] + i);
1810
int r = AV_RL16(src[2] + i);
1812
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1816
static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1819
const uint16_t **src = (const uint16_t **) _src;
1820
uint16_t *dst = (uint16_t *) _dst;
1821
for (i = 0; i < width; i++) {
1822
int g = AV_RB16(src[0] + i);
1823
int b = AV_RB16(src[1] + i);
1824
int r = AV_RB16(src[2] + i);
1826
dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1830
static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1833
for (i = 0; i < width; i++) {
1838
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1843
static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1846
const uint16_t **src = (const uint16_t **) _src;
1847
uint16_t *dstU = (uint16_t *) _dstU;
1848
uint16_t *dstV = (uint16_t *) _dstV;
1849
for (i = 0; i < width; i++) {
1850
int g = AV_RL16(src[0] + i);
1851
int b = AV_RL16(src[1] + i);
1852
int r = AV_RL16(src[2] + i);
1854
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1859
static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1862
const uint16_t **src = (const uint16_t **) _src;
1863
uint16_t *dstU = (uint16_t *) _dstU;
1864
uint16_t *dstV = (uint16_t *) _dstV;
1865
for (i = 0; i < width; i++) {
1866
int g = AV_RB16(src[0] + i);
1867
int b = AV_RB16(src[1] + i);
1868
int r = AV_RB16(src[2] + i);
1870
dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871
dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1875
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1876
const int16_t *filter,
64
static void fill_plane9or10(uint8_t *plane, int stride, int width,
65
int height, int y, uint8_t val,
66
const int dst_depth, const int big_endian)
69
uint16_t *dst = (uint16_t *) (plane + stride * y);
70
#define FILL8TO9_OR_10(wfunc) \
71
for (i = 0; i < height; i++) { \
72
for (j = 0; j < width; j++) { \
73
wfunc(&dst[j], (val << (dst_depth - 8)) | \
74
(val >> (16 - dst_depth))); \
79
FILL8TO9_OR_10(AV_WB16);
81
FILL8TO9_OR_10(AV_WL16);
86
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
87
const uint8_t *_src, const int16_t *filter,
1877
88
const int32_t *filterPos, int filterSize)
90
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
1880
int32_t *dst = (int32_t *) _dst;
92
int32_t *dst = (int32_t *) _dst;
1881
93
const uint16_t *src = (const uint16_t *) _src;
1882
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
94
int bits = desc->comp[0].depth_minus1;
1885
97
for (i = 0; i < dstW; i++) {
1887
99
int srcPos = filterPos[i];
1890
102
for (j = 0; j < filterSize; j++) {
1891
103
val += src[srcPos + j] * filter[filterSize * i + j];